##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(mlbench)
library(pls)
library(corrplot)
library(lares)
library(DMwR)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(ggpubr)
library(mda)
library(klaR)
library(pamr)
library(themis)
library(ROSE)
##################################
# Loading source and
# formulating the train set
##################################
data(Sonar)
Sonar.Original <- Sonar
Sonar.M <- Sonar[Sonar$Class=="M",]
Sonar.R <- Sonar[Sonar$Class=="R",]
set.seed(12345678)
Sonar.R.Reduced <- Sonar.R[sample(1:nrow(Sonar.R),25),]
Sonar <- as.data.frame(rbind(Sonar.M,Sonar.R.Reduced))
Sonar$Class <- factor(Sonar$Class,
levels=c("M","R"))
Sonar_Train <- Sonar[,c("Class","V1","V11")]
##################################
# Performing a general exploration of the train set
##################################
dim(Sonar_Train)## [1] 136 3
str(Sonar_Train)## 'data.frame': 136 obs. of 3 variables:
## $ Class: Factor w/ 2 levels "M","R": 1 1 1 1 1 1 1 1 1 1 ...
## $ V1 : num 0.0491 0.1313 0.0201 0.0629 0.0335 ...
## $ V11 : num 0.0947 0.2907 0.2251 0.5466 0.5533 ...
summary(Sonar_Train)## Class V1 V11
## M:111 Min. :0.00150 Min. :0.0523
## R: 25 1st Qu.:0.01550 1st Qu.:0.1780
## Median :0.02365 Median :0.2503
## Mean :0.03188 Mean :0.2642
## 3rd Qu.:0.03925 3rd Qu.:0.3222
## Max. :0.13710 Max. :0.7342
##################################
# Formulating a data type assessment summary
##################################
PDA <- Sonar_Train
(PDA.Summary <- data.frame(
Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)## Column.Index Column.Name Column.Type
## 1 1 Class factor
## 2 2 V1 numeric
## 3 3 V11 numeric
##################################
# Loading dataset
##################################
DQA <- Sonar_Train
##################################
# Formulating an overall data quality assessment summary
##################################
(DQA.Summary <- data.frame(
Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 Class factor 136 0 1.000
## 2 2 V1 numeric 136 0 1.000
## 3 3 V11 numeric 136 0 1.000
##################################
# Listing all predictors
##################################
DQA.Predictors <- DQA[,!names(DQA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DQA.Predictors.Numeric <- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
(length(names(DQA.Predictors.Numeric))),
" numeric predictor variable(s)."))
} else {
print("There are no numeric predictor variables.")
}## [1] "There are 2 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
DQA.Predictors.Factor <- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
(length(names(DQA.Predictors.Factor))),
" factor predictor variable(s)."))
} else {
print("There are no factor predictor variables.")
}## [1] "There are no factor predictor variables."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
FirstModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
ux[tab == max(tab)]
}
##################################
# Formulating a function to determine the second mode
##################################
SecondModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
fm = ux[tab == max(tab)]
sm = x[!(x %in% fm)]
usm <- unique(sm)
tabsm <- tabulate(match(sm, usm))
ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
(DQA.Predictors.Factor.Summary <- data.frame(
Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
FirstModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
ux[tab == max(tab)]
}
##################################
# Formulating a function to determine the second mode
##################################
SecondModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
fm = ux[tab == max(tab)]
sm = na.omit(x)[!(na.omit(x) %in% fm)]
usm <- unique(sm)
tabsm <- tabulate(match(sm, usm))
ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
(DQA.Predictors.Numeric.Summary <- data.frame(
Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 V1 numeric 122 0.897 0.020
## 2 V11 numeric 134 0.985 0.213
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 0.034 3 2 1.500
## 2 0.095 2 1 2.000
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 1 0.002 0.032 0.024 0.137 1.915 6.988 0.015 0.039
## 2 0.052 0.264 0.250 0.734 0.909 4.151 0.178 0.322
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
(nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
DQA.Summary[DQA.Summary$NA.Count>0,]
} else {
print("No missing observations noted.")
}## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
} else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
print(paste0("Low variance observed for ",
(nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
" factor variable(s) with First.Second.Mode.Ratio>5."))
DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
} else {
print("No low variance factor predictors due to high first-second mode ratio noted.")
}## [1] "No factor predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
} else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
print(paste0("Low variance observed for ",
(nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
" numeric variable(s) with First.Second.Mode.Ratio>5."))
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
} else {
print("No low variance numeric predictors due to high first-second mode ratio noted.")
}## [1] "No low variance numeric predictors due to high first-second mode ratio noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
} else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
print(paste0("Low variance observed for ",
(nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
" numeric variable(s) with Unique.Count.Ratio<0.01."))
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
} else {
print("No low variance numeric predictors due to low unique count ratio noted.")
}## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
} else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
(nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
} else {
print("No skewed numeric predictors noted.")
}## [1] "No skewed numeric predictors noted."
##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Listing all predictors
##################################
DPA.Predictors <- DPA[,!names(DPA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DPA.Predictors.Numeric <- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
##################################
# Identifying outliers for the numeric predictors
##################################
OutlierCountList <- c()
for (i in 1:ncol(DPA.Predictors.Numeric)) {
Outliers <- boxplot.stats(DPA.Predictors.Numeric[,i])$out
OutlierCount <- length(Outliers)
OutlierCountList <- append(OutlierCountList,OutlierCount)
OutlierIndices <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}OutlierCountSummary <- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
OutlierCountSummary$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
NumericPredictorWithOutlierCount <- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))## [1] "2 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
(DPA_Skimmed <- skim(DPA.Predictors.Numeric))| Name | DPA.Predictors.Numeric |
| Number of rows | 136 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.02 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
| V11 | 0 | 1 | 0.26 | 0.13 | 0.05 | 0.18 | 0.25 | 0.32 | 0.73 | ▅▇▂▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)## [1] 136 2
##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Gathering descriptive statistics
##################################
(DPA_Skimmed <- skim(DPA))| Name | DPA |
| Number of rows | 136 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| Class | 0 | 1 | FALSE | 2 | M: 111, R: 25 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.02 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
| V11 | 0 | 1 | 0.26 | 0.13 | 0.05 | 0.18 | 0.25 | 0.32 | 0.73 | ▅▇▂▁▁ |
##################################
# Identifying columns with low variance
###################################
DPA_LowVariance <- nearZeroVar(DPA,
freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
(DPA_LowVariance[DPA_LowVariance$nzv,])## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
} else {
print(paste0("Low variance observed for ",
(nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
DPA_LowVarianceForRemoval <- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
print(paste0("Low variance can be resolved by removing ",
(nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
DPA_LowVarianceRemovedVariable <- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
print(paste0("Variable ",
j,
" for removal: ",
DPA_LowVarianceRemovedVariable))
}
DPA %>%
skim() %>%
dplyr::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
##################################
# Filtering out columns with low variance
#################################
DPA_ExcludedLowVariance <- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
##################################
# Gathering descriptive statistics
##################################
(DPA_ExcludedLowVariance_Skimmed <- skim(DPA_ExcludedLowVariance))
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
} ## [1] "No low variance predictors noted."
##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Listing all predictors
##################################
DPA.Predictors <- DPA[,!names(DPA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DPA.Predictors.Numeric <- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
##################################
# Visualizing pairwise correlation between predictors
##################################
DPA_CorrelationTest <- cor.mtest(DPA.Predictors.Numeric,
method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")##################################
# Identifying the highly correlated variables
##################################
DPA_Correlation <- cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs")
(DPA_HighlyCorrelatedCount <- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95))## [1] 0
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
} else {
print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount),
" pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
(DPA_HighlyCorrelatedPairs <- corr_cross(DPA.Predictors.Numeric,
max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}## [1] "No highly correlated predictors noted."
if (DPA_HighlyCorrelatedCount > 0) {
DPA_HighlyCorrelated <- findCorrelation(DPA_Correlation, cutoff = 0.95)
(DPA_HighlyCorrelatedForRemoval <- length(DPA_HighlyCorrelated))
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval),
" numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
DPA_HighlyCorrelatedRemovedVariable <- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
print(paste0("Variable ",
j,
" for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
DPA_ExcludedHighCorrelation <- DPA[,-DPA_HighlyCorrelated]
##################################
# Gathering descriptive statistics
##################################
(DPA_ExcludedHighCorrelation_Skimmed <- skim(DPA_ExcludedHighCorrelation))
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
}##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Listing all predictors
##################################
DPA.Predictors <- DPA[,!names(DPA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DPA.Predictors.Numeric <- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
##################################
# Identifying the linearly dependent variables
##################################
DPA_LinearlyDependent <- findLinearCombos(DPA.Predictors.Numeric)
(DPA_LinearlyDependentCount <- length(DPA_LinearlyDependent$linearCombos))## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
} else {
print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount),
" subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
DPA_LinearlyDependentSubset <- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
print(paste0("Linear dependent variable(s) for subset ",
i,
" include: ",
DPA_LinearlyDependentSubset))
}
}## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
DPA_LinearlyDependent <- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependentForRemoval <- length(DPA_LinearlyDependent$remove)
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval),
" numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
DPA_LinearlyDependentRemovedVariable <- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
print(paste0("Variable ",
j,
" for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
DPA_ExcludedLinearlyDependent <- DPA[,-DPA_LinearlyDependent$remove]
##################################
# Gathering descriptive statistics
##################################
(DPA_ExcludedLinearlyDependent_Skimmed <- skim(DPA_ExcludedLinearlyDependent))
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
} else {
###################################
# Verifying the data dimensions
###################################
dim(DPA)
}## [1] 136 3
##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Listing all predictors
##################################
DPA.Predictors <- DPA[,!names(DPA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DPA.Predictors.Numeric <- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
##################################
# Applying a Box-Cox transformation
##################################
DPA_BoxCox <- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCoxTransformed <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
##################################
# Gathering descriptive statistics
##################################
(DPA_BoxCoxTransformedSkimmed <- skim(DPA_BoxCoxTransformed))| Name | DPA_BoxCoxTransformed |
| Number of rows | 136 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| V1 | 0 | 1 | -3.73 | 0.78 | -6.50 | -4.17 | -3.74 | -3.24 | -1.99 | ▁▁▇▇▂ |
| V11 | 0 | 1 | -1.07 | 0.29 | -1.73 | -1.25 | -1.06 | -0.91 | -0.29 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)## [1] 136 2
##################################
# Loading dataset
##################################
DPA <- Sonar_Train
##################################
# Listing all predictors
##################################
DPA.Predictors <- DPA[,!names(DPA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
DPA.Predictors.Numeric <- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
##################################
# Applying a Box-Cox transformation
##################################
DPA_BoxCox <- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCoxTransformed <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
##################################
# Applying a center and scale data transformation
##################################
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
##################################
# Gathering descriptive statistics
##################################
(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed <- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed))| Name | DPA.Predictors.Numeric_Bo… |
| Number of rows | 136 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| V1 | 0 | 1 | 0 | 1 | -3.56 | -0.56 | -0.02 | 0.63 | 2.23 | ▁▁▇▇▂ |
| V11 | 0 | 1 | 0 | 1 | -2.27 | -0.59 | 0.04 | 0.57 | 2.71 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)## [1] 136 2
##################################
# Creating the pre-modelling
# train set
##################################
Class <- DPA$Class
PMA.Predictors.Numeric <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_BoxCoxTransformed_CenteredScaledTransformed <- cbind(Class,PMA.Predictors.Numeric)
PMA_PreModelling_Train <- PMA_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
(PMA_PreModelling_Train_Skimmed <- skim(PMA_PreModelling_Train))| Name | PMA_PreModelling_Train |
| Number of rows | 136 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| Class | 0 | 1 | FALSE | 2 | M: 111, R: 25 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| V1 | 0 | 1 | 0 | 1 | -3.56 | -0.56 | -0.02 | 0.63 | 2.23 | ▁▁▇▇▂ |
| V11 | 0 | 1 | 0 | 1 | -2.27 | -0.59 | 0.04 | 0.57 | 2.71 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)## [1] 136 3
##################################
# Loading dataset
##################################
EDA <- PMA_PreModelling_Train
##################################
# Listing all predictors
##################################
EDA.Predictors <- EDA[,!names(EDA) %in% c("Class")]
##################################
# Listing all numeric predictors
##################################
EDA.Predictors.Numeric <- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
ncol(EDA.Predictors.Numeric)## [1] 2
names(EDA.Predictors.Numeric)## [1] "V1" "V11"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|")##################################
# Creating a local object
# for the train set
##################################
PMA_PreModelling_Train_LR <- PMA_PreModelling_Train
PMA_PreModelling_Train_LR$Label <- rep("LR",nrow(PMA_PreModelling_Train_LR))
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train_LR$Class) ##
## M R
## 111 25
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Original Imbalanced Data Set") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.62998 -0.53562 -0.29014 -0.08872 2.55270
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3152 0.3771 -6.139 8.28e-10 ***
## V1 -0.7399 0.3005 -2.462 0.0138 *
## V11 -1.5607 0.3450 -4.524 6.07e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.783 on 135 degrees of freedom
## Residual deviance: 88.923 on 133 degrees of freedom
## AIC: 94.923
##
## Number of Fisher Scoring iterations: 6
LR_Model_Coef <- (as.data.frame(LR_Model$coefficients))
LR_Model_Coef$Coef <- rownames(LR_Model_Coef)
LR_Model_Coef$Model <- rep("LR",nrow(LR_Model_Coef))
colnames(LR_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -2.3152181 (Intercept) LR
## V1 -0.7399005 V1 LR
## V11 -1.5607048 V11 LR
##################################
# Computing the model predictions
##################################
(LR_Model_Probabilities <- predict(LR_Model,
type = c("response")))## 98 99 100 101 102 103
## 0.3637496537 0.0113931997 0.1319142547 0.0021783355 0.0037538060 0.0174606489
## 104 105 106 107 108 109
## 0.0725273351 0.0164891503 0.1569862922 0.4818797362 0.2583425943 0.1252724633
## 110 111 112 113 114 115
## 0.4112142881 0.4680556213 0.1332256210 0.0344698396 0.1348432397 0.1615191484
## 116 117 118 119 120 121
## 0.0662797277 0.2436188082 0.0780323151 0.1104267965 0.0515700587 0.1445535907
## 122 123 124 125 126 127
## 0.1731460430 0.0558634929 0.1081612509 0.0165343094 0.0369004305 0.0031146384
## 128 129 130 131 132 133
## 0.0221776030 0.0200623323 0.0028456905 0.0126580025 0.0050535349 0.0081982533
## 134 135 136 137 138 139
## 0.0025368869 0.0014777580 0.0799936459 0.0023418015 0.0008311849 0.0846443282
## 140 141 142 143 144 145
## 0.2606104200 0.1780753538 0.0394327767 0.0403610789 0.0364744270 0.7351055523
## 146 147 148 149 150 151
## 0.0306057253 0.0228736619 0.0552988212 0.0205374459 0.2190542623 0.2301893864
## 152 153 154 155 156 157
## 0.1183496315 0.1771452296 0.3898752302 0.5789364640 0.6505131954 0.1549759240
## 158 159 160 161 162 163
## 0.1418070654 0.0908007706 0.0533759775 0.0797056569 0.0996424369 0.1113892912
## 164 165 166 167 168 169
## 0.2325914484 0.0352838163 0.0868721918 0.2269151196 0.5811364344 0.6804246275
## 170 171 172 173 174 175
## 0.2377989316 0.1546431081 0.0039867863 0.0484288031 0.0350025026 0.0115543490
## 176 177 178 179 180 181
## 0.0124263395 0.0040952633 0.0623582127 0.2442298264 0.0289787190 0.0051456066
## 182 183 184 185 186 187
## 0.0192102410 0.0354944115 0.0420803608 0.0020214950 0.0046310125 0.0336330699
## 188 189 190 191 192 193
## 0.0359277781 0.1172950153 0.1419944753 0.1265297826 0.1047718509 0.2755585363
## 194 195 196 197 198 199
## 0.1390569085 0.0450510742 0.1550648867 0.2872791732 0.0821006438 0.0726099031
## 200 201 202 203 204 205
## 0.0792285656 0.0532267024 0.0299770042 0.0173915161 0.0553203581 0.0345232209
## 206 207 208 95 57 27
## 0.0333086574 0.0446830571 0.0622826685 0.8967905106 0.2407267465 0.6874636823
## 18 68 92 43 87 64
## 0.4352384578 0.4517614566 0.0687256353 0.6033006196 0.1583196380 0.7793277803
## 16 12 61 13 34 66
## 0.0727076286 0.8563207774 0.3983590501 0.7017484676 0.3096802387 0.8196244700
## 49 94 91 72 23 39
## 0.3431752805 0.0384599213 0.0861723774 0.8744789160 0.4935529921 0.5807289286
## 29 73 77 32
## 0.0739144263 0.5017946378 0.5358622581 0.3382071510
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_Model_Indices <- predict(LR_Model,
type = c("link")))## 98 99 100 101 102 103
## -0.559126246 -4.463280018 -1.884138369 -6.127013509 -5.581224150 -4.030190682
## 104 105 106 107 108 109
## -2.548499795 -4.088426063 -1.680824728 -0.072512812 -1.054600830 -1.943421380
## 110 111 112 113 114 115
## -0.358947840 -0.127951795 -1.872734622 -3.332592613 -1.858797800 -1.646968043
## 116 117 118 119 120 121
## -2.645292815 -1.132940729 -2.469387136 -2.086388975 -2.911866678 -1.777973140
## 122 123 124 125 126 127
## -1.563492668 -2.827359673 -2.109662164 -4.085645174 -3.261933584 -5.768522713
## 128 129 130 131 132 133
## -3.786245149 -3.888644925 -5.859099785 -4.356726856 -5.282600962 -4.795602114
## 134 135 136 137 138 139
## -5.974277480 -6.515750374 -2.442433371 -6.054490239 -7.091826715 -2.380854599
## 140 141 142 143 144 145
## -1.042798305 -1.529441923 -3.192926600 -3.168691163 -3.273987644 1.020682662
## 146 147 148 149 150 151
## -3.455484324 -3.754629841 -2.838117074 -3.864754157 -1.271186197 -1.207242139
## 152 153 154 155 156 157
## -2.008152348 -1.535809819 -0.447836712 0.318408998 0.621295777 -1.696095344
## 158 159 160 161 162 163
## -1.800361500 -2.303896473 -2.875541209 -2.446352996 -2.201203830 -2.076628048
## 164 165 166 167 168 169
## -1.193735847 -3.308409550 -2.352437879 -1.225812824 0.327440308 0.755723902
## 170 171 172 173 174 175
## -1.164784899 -1.698638961 -5.520775067 -2.978019769 -3.316705945 -4.449071761
## 176 177 178 179 180 181
## -4.375432715 -5.493820583 -2.710472604 -1.129627633 -3.511786651 -5.264453127
## 182 183 184 185 186 187
## -3.932914599 -3.302240366 -3.125182749 -6.201894377 -5.370337994 -3.358033801
## 188 189 190 191 192 193
## -3.289655451 -2.018298780 -1.798822388 -1.931996317 -2.145293464 -0.966600884
## 194 195 196 197 198 199
## -1.823145144 -3.053861030 -1.695416182 -0.908635325 -2.414141893 -2.547272974
## 200 201 202 203 204 205
## -2.452874924 -2.878499479 -3.476889218 -4.034228249 -2.837704887 -3.330989885
## 206 207 208 95 57 27
## -3.368061909 -3.062448770 -2.711765358 2.162061490 -1.148699272 0.788288323
## 18 68 92 43 87 64
## -0.260509533 -0.193556201 -2.606431655 0.419236847 -1.670784310 1.261753296
## 16 12 61 13 34 66
## -2.545822600 1.785061852 -0.412307104 0.855637833 -0.801614636 1.513805316
## 49 94 91 72 23 39
## -0.649175855 -3.218919553 -2.361292279 1.941154440 -0.025789461 0.325766429
## 29 73 77 32
## -2.528058620 0.007178582 0.143695781 -0.671294010
max(LR_Model_Indices)## [1] 2.162061
min(LR_Model_Indices)## [1] -7.091827
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR)
LR_Model_Predictions$LR_Prob <- LR_Model_Probabilities
LR_Model_Predictions$LR_LP <- LR_Model_Indices
LR_Model_Predictions$Class <- as.factor(LR_Model_Predictions$Class)
LR_Model_Predictions$Label <- rep("LR",nrow(LR_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_Model_Predictions %>%
ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Random Downsampling") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing US_DOWNSAMPLE
# Visualizing the undersampled data using US_DOWNSAMPLE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_downsample(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
labs(title = "With Undersampling - Random Downsample") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")US_DOWNSAMPLE <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_downsample(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_US_DOWNSAMPLE <- US_DOWNSAMPLE %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_US_DOWNSAMPLE <- as.data.frame(PMA_PreModelling_Train_LR_US_DOWNSAMPLE))## V1 V11 Class
## 1 -0.295964244 1.50754826 M
## 2 -0.371994173 0.60103638 M
## 3 -0.789109306 -0.36302243 M
## 4 -0.256318859 -0.63813480 M
## 5 -0.779287302 0.73036007 M
## 6 0.537644499 -0.19150300 M
## 7 0.043933590 0.30731955 M
## 8 0.424204681 0.54321732 M
## 9 1.395977603 1.55085071 M
## 10 1.852613244 0.04399440 M
## 11 -0.507041623 0.38985083 M
## 12 -0.555415865 0.01776805 M
## 13 -2.093160440 0.59563328 M
## 14 0.207991857 -0.39104999 M
## 15 0.278484686 -2.26945660 M
## 16 -0.068999246 0.63930609 M
## 17 -0.935159101 0.03686239 M
## 18 0.424204681 1.89154083 M
## 19 0.625610136 0.17668535 M
## 20 0.256869245 1.19827720 M
## 21 0.695599819 -0.11827986 M
## 22 -0.499153812 0.87301303 M
## 23 0.345301912 -0.27257795 M
## 24 1.231717734 1.85842121 M
## 25 -0.957451233 0.02573746 M
## 26 -2.902302684 -1.49282840 R
## 27 -0.588710342 -0.46833442 R
## 28 -0.597170869 -1.70542147 R
## 29 -0.289270938 -1.17938825 R
## 30 0.544629613 -1.61762409 R
## 31 0.064360561 0.15607894 R
## 32 -0.168320027 -1.67226688 R
## 33 -0.316256467 -0.26298086 R
## 34 -1.638716948 -1.51501038 R
## 35 0.274190661 0.01776805 R
## 36 -0.860055136 -2.21946189 R
## 37 -0.789109306 -0.84516218 R
## 38 -1.427539411 -1.35491277 R
## 39 0.779483548 -1.33935885 R
## 40 -0.400799099 -2.26338258 R
## 41 -0.302692686 -0.92399268 R
## 42 0.827857790 0.18656255 R
## 43 -0.829167731 0.42261407 R
## 44 -2.434915803 -1.57286451 R
## 45 -1.138280881 -0.92728240 R
## 46 -0.860055136 -1.28443852 R
## 47 -1.125398710 0.66990478 R
## 48 -0.186674974 -1.39954451 R
## 49 -0.008605128 -1.57143555 R
## 50 -1.348878827 -0.41384379 R
PMA_PreModelling_Train_LR_US_DOWNSAMPLE$Label <- rep("LR_US_DOWNSAMPLE",nrow(PMA_PreModelling_Train_LR_US_DOWNSAMPLE))
##################################
# Verifying the class distribution
# for the undersampled data using US_DOWNSAMPLE
##################################
table(PMA_PreModelling_Train_LR_US_DOWNSAMPLE$Class) ##
## M R
## 25 25
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_US_DOWNSAMPLE_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_US_DOWNSAMPLE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_DOWNSAMPLE_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_DOWNSAMPLE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.44182 -0.67861 0.03524 0.59883 1.93261
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7625 0.4369 -1.745 0.080920 .
## V1 -0.7448 0.5075 -1.468 0.142179
## V11 -1.7181 0.5063 -3.393 0.000691 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.315 on 49 degrees of freedom
## Residual deviance: 43.344 on 47 degrees of freedom
## AIC: 49.344
##
## Number of Fisher Scoring iterations: 5
LR_US_DOWNSAMPLE_Model_Coef <- (as.data.frame(LR_US_DOWNSAMPLE_Model$coefficients))
LR_US_DOWNSAMPLE_Model_Coef$Coef <- rownames(LR_US_DOWNSAMPLE_Model_Coef)
LR_US_DOWNSAMPLE_Model_Coef$Model <- rep("LR_US_DOWNSAMPLE",nrow(LR_US_DOWNSAMPLE_Model_Coef))
colnames(LR_US_DOWNSAMPLE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_DOWNSAMPLE_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -0.7625171 (Intercept) LR_US_DOWNSAMPLE
## V1 -0.7448227 V1 LR_US_DOWNSAMPLE
## V11 -1.7180819 V11 LR_US_DOWNSAMPLE
##################################
# Computing the model predictions
##################################
(LR_US_DOWNSAMPLE_Model_Probabilities <- predict(LR_US_DOWNSAMPLE_Model,
type = c("response")))## 1 2 3 4 5 6
## 0.041799701 0.179744842 0.610385203 0.628260800 0.192025135 0.302810692
## 7 8 9 10 11 12
## 0.210279740 0.117973061 0.011354052 0.098148202 0.258331895 0.406281809
## 13 14 15 16 17 18
## 0.443534294 0.438912131 0.949269888 0.140697941 0.467714486 0.013018286
## 19 20 21 22 23 24
## 0.177694571 0.046860883 0.253998128 0.131171064 0.365540908 0.007593438
## 25 26 27 28 29 30
## 0.476615880 0.981366508 0.617890633 0.931650474 0.814455352 0.833555873
## 31 32 33 34 35 36
## 0.253769009 0.903431085 0.481223964 0.955252856 0.269478992 0.975667022
## 37 38 39 40 41 42
## 0.781987655 0.932679562 0.722728010 0.968467123 0.740857535 0.154510435
## 43 44 45 46 47 48
## 0.295042156 0.977098243 0.842698840 0.889421156 0.254406742 0.855827057
## 49 50
## 0.874763232 0.721753452
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_US_DOWNSAMPLE_Model_Indices <- predict(LR_US_DOWNSAMPLE_Model,
type = c("link")))## 1 2 3 4 5 6
## -3.13216766 -1.51807716 0.44893169 0.52476284 -1.43690467 -0.83394915
## 7 8 9 10 11 12
## -1.32324004 -2.01176629 -4.46676157 -2.21797160 -1.05465667 -0.37935775
## 13 14 15 16 17 18
## -0.22683042 -0.24557829 2.92917349 -1.80950517 -0.12932199 -4.32829655
## 19 20 21 22 23 24
## -1.53204571 -3.01257759 -1.07740121 -1.89064395 -0.55139462 -4.87284841
## 25 26 27 28 29 30
## -0.09360477 3.96398538 0.48060460 2.61232309 1.47922407 1.61104105
## 31 32 33 34 35 36
## -1.07861075 2.23594294 -0.07513948 3.06094845 -0.99726756 3.69128882
## 37 38 39 40 41 42
## 1.27728727 2.62859781 0.95803402 3.42468383 1.05043038 -1.69965419
## 43 44 45 46 47 48
## -0.87101976 3.75337359 1.67844746 2.08484208 -1.07524589 1.78105475
## 49 50
## 1.94374717 0.95317602
max(LR_US_DOWNSAMPLE_Model_Indices)## [1] 3.963985
min(LR_US_DOWNSAMPLE_Model_Indices)## [1] -4.872848
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_US_DOWNSAMPLE_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_US_DOWNSAMPLE)
LR_US_DOWNSAMPLE_Model_Predictions$LR_US_DOWNSAMPLE_Prob <- LR_US_DOWNSAMPLE_Model_Probabilities
LR_US_DOWNSAMPLE_Model_Predictions$LR_US_DOWNSAMPLE_LP <- LR_US_DOWNSAMPLE_Model_Indices
LR_US_DOWNSAMPLE_Model_Predictions$Class <- as.factor(LR_US_DOWNSAMPLE_Model_Predictions$Class)
LR_US_DOWNSAMPLE_Model_Predictions$Label <- rep("LR_US_DOWNSAMPLE",nrow(LR_US_DOWNSAMPLE_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_US_DOWNSAMPLE_Model_Predictions %>%
ggplot(aes(x = LR_US_DOWNSAMPLE_LP ,
y = LR_US_DOWNSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_DOWNSAMPLE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Random Upsampling") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing OS_UPSAMPLE
# Visualizing the oversampled data using OS_UPSAMPLE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_upsample(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Random Upsample") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")OS_UPSAMPLE <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_upsample(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_OS_UPSAMPLE <- OS_UPSAMPLE %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_OS_UPSAMPLE <- as.data.frame(PMA_PreModelling_Train_LR_OS_UPSAMPLE))## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.408807916 -1.63079048 M
## 11 0.738227771 -1.15770286 M
## 12 1.169078303 -0.79246144 M
## 13 0.118911997 -1.30982705 M
## 14 -0.174409184 -1.31877657 M
## 15 1.012209983 -0.76338471 M
## 16 0.813818599 0.26605270 M
## 17 0.207991857 -0.39104999 M
## 18 -0.957451233 0.02573746 M
## 19 0.695599819 -0.11827986 M
## 20 -1.204708453 -0.18639848 M
## 21 -0.068999246 0.13149291 M
## 22 0.527094931 -0.39650480 M
## 23 0.104263091 0.33286517 M
## 24 0.465616266 -0.56497212 M
## 25 -0.507041623 -0.24127887 M
## 26 0.043933590 0.30731955 M
## 27 0.147716923 -0.20173692 M
## 28 0.612463710 0.84401925 M
## 29 -0.068999246 0.63930609 M
## 30 1.395977603 1.55085071 M
## 31 -0.180527407 1.02812486 M
## 32 0.565359423 0.74012531 M
## 33 2.230423635 1.21329169 M
## 34 0.782380194 0.93715691 M
## 35 2.005116912 0.95072168 M
## 36 1.784287809 0.74337479 M
## 37 1.523834205 1.62207084 M
## 38 1.928176420 1.77732114 M
## 39 -1.204708453 0.65264083 M
## 40 1.934080462 1.47897609 M
## 41 0.744203377 2.70773324 M
## 42 1.424344244 -0.63319889 M
## 43 -0.491314245 -0.58236264 M
## 44 0.689392711 -0.83030322 M
## 45 1.381555338 -0.09258957 M
## 46 1.002499594 0.07158504 M
## 47 0.977896818 0.15071601 M
## 48 0.278484686 -2.26945660 M
## 49 1.406688795 0.06372536 M
## 50 1.852613244 0.04399440 M
## 51 1.281675939 -0.27257795 M
## 52 1.390588252 0.33359231 M
## 53 -0.192852168 -0.57752146 M
## 54 -0.180527407 -0.62433567 M
## 55 -0.052243905 -0.17198030 M
## 56 -0.779287302 -0.12994977 M
## 57 -0.041194136 -1.17696945 M
## 58 -0.924156756 -1.24933498 M
## 59 -0.168320027 -1.80173334 M
## 60 -2.093160440 0.59563328 M
## 61 -0.230553865 -0.22058604 M
## 62 -1.038676203 0.48516249 M
## 63 -0.030238811 0.37335493 M
## 64 0.089444831 0.04161877 M
## 65 0.303951059 -0.21715050 M
## 66 -0.132380328 -0.09011428 M
## 67 -1.546463816 0.01457488 M
## 68 -0.499153812 0.87301303 M
## 69 -0.108968439 0.07550792 M
## 70 0.686277848 -1.02337267 M
## 71 -0.721885235 -1.35101476 M
## 72 -3.557061230 -0.28132844 M
## 73 -0.789109306 -0.36302243 M
## 74 -0.750264961 -0.03937819 M
## 75 -0.379134945 2.23365699 M
## 76 -0.371994173 0.60103638 M
## 77 0.401039618 0.45156422 M
## 78 -0.295964244 1.50754826 M
## 79 0.256869245 1.19827720 M
## 80 1.243886483 1.44694263 M
## 81 -0.230553865 0.36255507 M
## 82 -0.256318859 -0.63813480 M
## 83 0.632133128 0.46700244 M
## 84 0.324793230 1.73570326 M
## 85 0.723165726 0.69367752 M
## 86 -1.191144673 1.19711994 M
## 87 -1.177722925 1.07730973 M
## 88 0.142960831 2.42255907 M
## 89 0.443194144 1.74741590 M
## 90 -0.180527407 0.75375436 M
## 91 0.544629613 0.36615870 M
## 92 -1.274767709 0.41409622 M
## 93 -0.539087424 -0.07530230 M
## 94 -0.555415865 0.01776805 M
## 95 0.345301912 -0.27257795 M
## 96 -1.868589923 0.02175517 M
## 97 -0.217863016 -0.21200423 M
## 98 0.625610136 0.17668535 M
## 99 -0.799007156 -0.01833539 M
## 100 -2.013850697 0.05347991 M
## 101 0.537644499 -0.19150300 M
## 102 -0.013979414 0.15531332 M
## 103 -0.935159101 0.53154288 M
## 104 -0.779287302 0.73036007 M
## 105 0.424204681 0.54321732 M
## 106 0.157176488 1.02691760 M
## 107 -0.323092564 0.48794823 M
## 108 0.377448180 0.47190071 M
## 109 0.992715078 0.20396774 M
## 110 0.295518362 0.33867807 M
## 111 0.099342681 0.20698569 M
## 112 -2.434915803 -1.57286451 R
## 113 -0.829167731 0.42261407 R
## 114 -0.829167731 0.42261407 R
## 115 -2.434915803 -1.57286451 R
## 116 -1.348878827 -0.41384379 R
## 117 -0.008605128 -1.57143555 R
## 118 -0.168320027 -1.67226688 R
## 119 -1.348878827 -0.41384379 R
## 120 -0.316256467 -0.26298086 R
## 121 -1.427539411 -1.35491277 R
## 122 -0.597170869 -1.70542147 R
## 123 -0.186674974 -1.39954451 R
## 124 -0.400799099 -2.26338258 R
## 125 0.544629613 -1.61762409 R
## 126 -0.789109306 -0.84516218 R
## 127 -0.316256467 -0.26298086 R
## 128 -0.597170869 -1.70542147 R
## 129 -0.588710342 -0.46833442 R
## 130 -0.186674974 -1.39954451 R
## 131 -2.434915803 -1.57286451 R
## 132 0.544629613 -1.61762409 R
## 133 0.827857790 0.18656255 R
## 134 -0.186674974 -1.39954451 R
## 135 -0.588710342 -0.46833442 R
## 136 0.544629613 -1.61762409 R
## 137 -0.186674974 -1.39954451 R
## 138 0.064360561 0.15607894 R
## 139 -0.789109306 -0.84516218 R
## 140 -0.302692686 -0.92399268 R
## 141 -0.860055136 -1.28443852 R
## 142 -0.289270938 -1.17938825 R
## 143 -0.302692686 -0.92399268 R
## 144 0.544629613 -1.61762409 R
## 145 -0.860055136 -1.28443852 R
## 146 -0.186674974 -1.39954451 R
## 147 0.064360561 0.15607894 R
## 148 -0.008605128 -1.57143555 R
## 149 -1.427539411 -1.35491277 R
## 150 -0.588710342 -0.46833442 R
## 151 0.779483548 -1.33935885 R
## 152 -0.860055136 -1.28443852 R
## 153 0.827857790 0.18656255 R
## 154 -0.186674974 -1.39954451 R
## 155 -0.186674974 -1.39954451 R
## 156 -0.588710342 -0.46833442 R
## 157 -2.434915803 -1.57286451 R
## 158 0.779483548 -1.33935885 R
## 159 -0.168320027 -1.67226688 R
## 160 -0.860055136 -2.21946189 R
## 161 -0.588710342 -0.46833442 R
## 162 -2.434915803 -1.57286451 R
## 163 -1.138280881 -0.92728240 R
## 164 -0.008605128 -1.57143555 R
## 165 -0.316256467 -0.26298086 R
## 166 -0.860055136 -2.21946189 R
## 167 0.544629613 -1.61762409 R
## 168 -0.289270938 -1.17938825 R
## 169 -1.638716948 -1.51501038 R
## 170 -1.638716948 -1.51501038 R
## 171 -0.829167731 0.42261407 R
## 172 -0.302692686 -0.92399268 R
## 173 -2.902302684 -1.49282840 R
## 174 -0.588710342 -0.46833442 R
## 175 -0.588710342 -0.46833442 R
## 176 0.064360561 0.15607894 R
## 177 -0.316256467 -0.26298086 R
## 178 -0.186674974 -1.39954451 R
## 179 -1.348878827 -0.41384379 R
## 180 -2.434915803 -1.57286451 R
## 181 -1.138280881 -0.92728240 R
## 182 -0.302692686 -0.92399268 R
## 183 -0.400799099 -2.26338258 R
## 184 -1.638716948 -1.51501038 R
## 185 -1.125398710 0.66990478 R
## 186 -0.860055136 -1.28443852 R
## 187 -0.789109306 -0.84516218 R
## 188 -0.588710342 -0.46833442 R
## 189 -1.138280881 -0.92728240 R
## 190 -0.302692686 -0.92399268 R
## 191 0.779483548 -1.33935885 R
## 192 0.779483548 -1.33935885 R
## 193 -0.860055136 -1.28443852 R
## 194 -1.638716948 -1.51501038 R
## 195 -0.597170869 -1.70542147 R
## 196 0.827857790 0.18656255 R
## 197 -0.302692686 -0.92399268 R
## 198 -1.138280881 -0.92728240 R
## 199 -0.400799099 -2.26338258 R
## 200 -1.348878827 -0.41384379 R
## 201 -0.302692686 -0.92399268 R
## 202 -0.860055136 -2.21946189 R
## 203 -1.348878827 -0.41384379 R
## 204 -0.789109306 -0.84516218 R
## 205 -0.829167731 0.42261407 R
## 206 -0.168320027 -1.67226688 R
## 207 -0.860055136 -2.21946189 R
## 208 -0.400799099 -2.26338258 R
## 209 -1.427539411 -1.35491277 R
## 210 -0.860055136 -2.21946189 R
## 211 -0.597170869 -1.70542147 R
## 212 -0.400799099 -2.26338258 R
## 213 -0.860055136 -1.28443852 R
## 214 -0.302692686 -0.92399268 R
## 215 -1.427539411 -1.35491277 R
## 216 -1.348878827 -0.41384379 R
## 217 0.064360561 0.15607894 R
## 218 -0.168320027 -1.67226688 R
## 219 -0.168320027 -1.67226688 R
## 220 -0.860055136 -1.28443852 R
## 221 -1.638716948 -1.51501038 R
## 222 -0.588710342 -0.46833442 R
PMA_PreModelling_Train_LR_OS_UPSAMPLE$Label <- rep("LR_OS_UPSAMPLE",nrow(PMA_PreModelling_Train_LR_OS_UPSAMPLE))
##################################
# Verifying the class distribution
# for the oversampled data using OS_UPSAMPLE
##################################
table(PMA_PreModelling_Train_LR_OS_UPSAMPLE$Class) ##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_OS_UPSAMPLE_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_OS_UPSAMPLE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_UPSAMPLE_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_UPSAMPLE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.41124 -0.60898 0.06056 0.60185 2.17631
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1647 0.2470 -4.715 2.42e-06 ***
## V1 -0.9110 0.2482 -3.670 0.000243 ***
## V11 -1.8812 0.2634 -7.142 9.17e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 181.78 on 219 degrees of freedom
## AIC: 187.78
##
## Number of Fisher Scoring iterations: 5
LR_OS_UPSAMPLE_Model_Coef <- (as.data.frame(LR_OS_UPSAMPLE_Model$coefficients))
LR_OS_UPSAMPLE_Model_Coef$Coef <- rownames(LR_OS_UPSAMPLE_Model_Coef)
LR_OS_UPSAMPLE_Model_Coef$Model <- rep("LR_OS_UPSAMPLE",nrow(LR_OS_UPSAMPLE_Model_Coef))
colnames(LR_OS_UPSAMPLE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_UPSAMPLE_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -1.1646747 (Intercept) LR_OS_UPSAMPLE
## V1 -0.9110338 V1 LR_OS_UPSAMPLE
## V11 -1.8811681 V11 LR_OS_UPSAMPLE
##################################
# Computing the model predictions
##################################
(LR_OS_UPSAMPLE_Model_Probabilities <- predict(LR_OS_UPSAMPLE_Model,
type = c("response")))## 1 2 3 4 5 6
## 0.7179750734 0.0219760571 0.3450992033 0.0030705786 0.0060031731 0.0371925877
## 7 8 9 10 11 12
## 0.1921472883 0.0352984771 0.4056307613 0.8221055779 0.5843383511 0.3232247505
## 13 14 15 16 17 18
## 0.7669193985 0.8138250288 0.3428144171 0.0826722516 0.3501155120 0.4156101313
## 19 20 21 22 23 24
## 0.1713790099 0.5704046636 0.2060053981 0.2892593757 0.1317198352 0.3714364633
## 25 26 27 28 29 30
## 0.4381049807 0.1439553447 0.2850090965 0.0352163999 0.0907545671 0.0047075528
## 31 32 33 34 35 36
## 0.0504845805 0.0442755776 0.0041558586 0.0255708393 0.0083271556 0.0149403855
## 37 38 39 40 41 42
## 0.0036683904 0.0018985803 0.2150293746 0.0033053895 0.0009708468 0.2190609887
## 43 44 45 46 47 48
## 0.5934995135 0.4425560636 0.0954235210 0.0986203238 0.0879365600 0.9453623871
## 49 50 51 52 53 54
## 0.0713526975 0.0504392980 0.1394866952 0.0448266150 0.5243395342 0.5434581342
## 55 56 57 58 59 60
## 0.3114058838 0.4476277500 0.7478035602 0.8836514796 0.9151368708 0.4065560302
## 61 62 63 64 65 66
## 0.3682636851 0.2439622644 0.1371140479 0.2100782444 0.2624859141 0.2943080924
## 67 68 69 70 71 72
## 0.5539858259 0.0868900350 0.2301536474 0.5337583288 0.8843692868 0.9311925798
## 73 74 75 76 77 78
## 0.5590079462 0.3996122680 0.0065536738 0.1238543705 0.0847490662 0.0234079574
## 79 80 81 82 83 84
## 0.0252623769 0.0065622381 0.1629201398 0.5669159737 0.0679205488 0.0087864263
## 85 86 87 88 89 90
## 0.0419497741 0.0885487004 0.1073279679 0.0028654573 0.0077243548 0.0817993402
## 91 92 93 94 95 96
## 0.0870930602 0.3138237976 0.3700758345 0.3335668084 0.2755803841 0.6216949898
## 97 98 99 100 101 102
## 0.3618419434 0.1123457119 0.4007694305 0.6386262692 0.2151364084 0.1909097870
## 103 104 105 106 107 108
## 0.2120463101 0.1383994535 0.0708949509 0.0376997727 0.1432887529 0.0834577765
## 109 110 111 112 113 114
## 0.0792362717 0.1119463627 0.1618463380 0.9822321769 0.2307141816 0.2307141816
## 115 116 117 118 119 120
## 0.9822321769 0.6990355243 0.8580636277 0.8942101026 0.6990355243 0.4056824362
## 121 122 123 124 125 126
## 0.9361166156 0.9300561845 0.8372859227 0.9694768165 0.7993396439 0.7584357774
## 127 128 129 130 131 132
## 0.4056824362 0.9300561845 0.5628350830 0.8372859227 0.9822321769 0.7993396439
## 133 134 135 136 137 138
## 0.0936520778 0.8372859227 0.5628350830 0.7993396439 0.8372859227 0.1799155074
## 139 140 141 142 143 144
## 0.7584357774 0.7004148673 0.8844343440 0.7887635614 0.7004148673 0.7993396439
## 145 146 147 148 149 150
## 0.8844343440 0.8372859227 0.1799155074 0.8580636277 0.9361166156 0.5628350830
## 151 152 153 154 155 156
## 0.6558261080 0.8844343440 0.0936520778 0.8372859227 0.8372859227 0.5628350830
## 157 158 159 160 161 162
## 0.9822321769 0.6558261080 0.8942101026 0.9779909273 0.5628350830 0.9822321769
## 163 164 165 166 167 168
## 0.8343411495 0.8580636277 0.4056824362 0.9779909273 0.7993396439 0.7887635614
## 169 170 171 172 173 174
## 0.9600071925 0.9600071925 0.2307141816 0.7004148673 0.9864494168 0.5628350830
## 175 176 177 178 179 180
## 0.5628350830 0.1799155074 0.4056824362 0.8372859227 0.6990355243 0.9822321769
## 181 182 183 184 185 186
## 0.8343411495 0.7004148673 0.9694768165 0.9600071925 0.1978792907 0.8844343440
## 187 188 189 190 191 192
## 0.7584357774 0.5628350830 0.8343411495 0.7004148673 0.6558261080 0.6558261080
## 193 194 195 196 197 198
## 0.8844343440 0.9600071925 0.9300561845 0.0936520778 0.7004148673 0.8343411495
## 199 200 201 202 203 204
## 0.9694768165 0.6990355243 0.7004148673 0.9779909273 0.6990355243 0.7584357774
## 205 206 207 208 209 210
## 0.2307141816 0.8942101026 0.9779909273 0.9694768165 0.9361166156 0.9779909273
## 211 212 213 214 215 216
## 0.9300561845 0.9694768165 0.8844343440 0.7004148673 0.9361166156 0.6990355243
## 217 218 219 220 221 222
## 0.1799155074 0.8942101026 0.8942101026 0.8844343440 0.9600071925 0.5628350830
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_OS_UPSAMPLE_Model_Indices <- predict(LR_OS_UPSAMPLE_Model,
type = c("link")))## 1 2 3 4 5 6
## 0.93443939 -3.79558061 -0.64065185 -5.78281396 -5.10944583 -3.25374392
## 7 8 9 10 11 12
## -1.43611755 -3.30797893 -0.38205745 1.53067859 0.34060860 -0.73899133
## 13 14 15 16 17 18
## 1.19099739 1.47505845 -0.65077721 -2.40658081 -0.61853150 -0.34082071
## 19 20 21 22 23 24
## -1.57588533 0.28350236 -1.34917429 -0.89898378 -1.88583723 -0.52605929
## 25 26 27 28 29 30
## -0.24885647 -1.78281940 -0.91974872 -3.31039195 -2.30445626 -5.35386842
## 31 32 33 34 35 36
## -2.93428382 -3.07203638 -5.47907171 -3.64039921 -4.77987133 -4.18863418
## 37 38 39 40 41 42
## -5.60432715 -6.26474851 -1.29487165 -5.70889009 -6.93637061 -1.27114688
## 43 44 45 46 47 48
## 0.37845127 -0.23079478 -2.24914176 -2.21264920 -2.33909390 2.85084581
## 49 50 51 52 53 54
## -2.56609386 -2.93522886 -1.81955986 -3.05909083 0.09743515 0.17427227
## 55 56 57 58 59 60
## -0.79355485 -0.21026021 1.08693202 2.02747256 2.37803393 -0.37822105
## 61 62 63 64 65 66
## -0.53967287 -1.13107773 -1.83946948 -1.32445384 -1.03308777 -0.87455160
## 67 68 69 70 71 72
## 0.21678837 -2.35221296 -1.20744383 0.13523906 2.03447311 2.60515452
## 73 74 75 76 77 78
## 0.23713683 -0.40708092 -5.02115426 -1.95642588 -2.37950354 -3.73099298
## 79 80 81 82 83 84
## -3.65285212 -5.01983969 -1.63665934 0.26927933 -2.61907944 -4.72572196
## 85 86 87 88 89 90
## -3.12842716 -2.33148548 -2.11832998 -5.85215776 -4.85562264 -2.41814677
## 91 92 93 94 95 96
## -2.34965674 -0.78230278 -0.53189150 -0.69209673 -0.96649144 0.49674883
## 97 98 99 100 101 102
## -0.56737849 -2.06700152 -0.40226017 0.56940673 -1.29423765 -1.44410943
## 103 104 105 106 107 108
## -1.31263462 -1.82864767 -2.57302259 -3.23967242 -1.78823908 -2.39626730
## 109 110 111 112 113 114
## -2.45276930 -2.07101229 -1.64455410 4.01243858 -1.20428294 -1.20428294
## 115 116 117 118 119 120
## 4.01243858 0.84270933 1.79929938 2.13448574 0.84270933 -0.38184312
## 121 122 123 124 125 126
## 2.68468076 2.58755271 1.63817108 3.45827005 1.38217223 1.14412276
## 127 128 129 130 131 132
## -0.38184312 2.58755271 0.25267615 1.63817108 4.01243858 1.38217223
## 133 134 135 136 137 138
## -2.26983664 1.63817108 0.25267615 1.38217223 1.63817108 -1.51692004
## 139 140 141 142 143 144
## 1.14412276 0.84927420 2.03510946 1.31748854 0.84927420 1.38217223
## 145 146 147 148 149 150
## 2.03510946 1.63817108 -1.51692004 1.79929938 2.68468076 0.25267615
## 151 152 153 154 155 156
## 0.64474864 2.03510946 -2.26983664 1.63817108 1.63817108 0.25267615
## 157 158 159 160 161 162
## 4.01243858 0.64474864 2.13448574 3.79404563 0.25267615 4.01243858
## 163 164 165 166 167 168
## 1.61671181 1.79929938 -0.38184312 3.79404563 1.38217223 1.31748854
## 169 170 171 172 173 174
## 3.17824115 3.17824115 -1.20428294 0.84927420 4.28768246 0.25267615
## 175 176 177 178 179 180
## 0.25267615 -1.51692004 -0.38184312 1.63817108 0.84270933 4.01243858
## 181 182 183 184 185 186
## 1.61671181 0.84927420 3.45827005 3.17824115 -1.39960190 2.03510946
## 187 188 189 190 191 192
## 1.14412276 0.25267615 1.61671181 0.84927420 0.64474864 0.64474864
## 193 194 195 196 197 198
## 2.03510946 3.17824115 2.58755271 -2.26983664 0.84927420 1.61671181
## 199 200 201 202 203 204
## 3.45827005 0.84270933 0.84927420 3.79404563 0.84270933 1.14412276
## 205 206 207 208 209 210
## -1.20428294 2.13448574 3.79404563 3.45827005 2.68468076 3.79404563
## 211 212 213 214 215 216
## 2.58755271 3.45827005 2.03510946 0.84927420 2.68468076 0.84270933
## 217 218 219 220 221 222
## -1.51692004 2.13448574 2.13448574 2.03510946 3.17824115 0.25267615
max(LR_OS_UPSAMPLE_Model_Indices)## [1] 4.287682
min(LR_OS_UPSAMPLE_Model_Indices)## [1] -6.936371
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_OS_UPSAMPLE_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_OS_UPSAMPLE)
LR_OS_UPSAMPLE_Model_Predictions$LR_OS_UPSAMPLE_Prob <- LR_OS_UPSAMPLE_Model_Probabilities
LR_OS_UPSAMPLE_Model_Predictions$LR_OS_UPSAMPLE_LP <- LR_OS_UPSAMPLE_Model_Indices
LR_OS_UPSAMPLE_Model_Predictions$Class <- as.factor(LR_OS_UPSAMPLE_Model_Predictions$Class)
LR_OS_UPSAMPLE_Model_Predictions$Label <- rep("LR_OS_UPSAMPLE",nrow(LR_OS_UPSAMPLE_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_OS_UPSAMPLE_Model_Predictions %>%
ggplot(aes(x = LR_OS_UPSAMPLE_LP ,
y = LR_OS_UPSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_UPSAMPLE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Near Miss Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing US_NEARMISS
# Visualizing the undersampled data using US_NEARMISS
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_nearmiss(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Near Miss Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")US_NEARMISS <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_nearmiss(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_US_NEARMISS <- US_NEARMISS %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_US_NEARMISS <- as.data.frame(PMA_PreModelling_Train_LR_US_NEARMISS))## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 1.143139503 0.55690423 M
## 3 -0.935159101 0.03686239 M
## 4 1.169078303 -0.79246144 M
## 5 -1.204708453 -0.18639848 M
## 6 -0.068999246 0.13149291 M
## 7 0.565359423 0.74012531 M
## 8 2.230423635 1.21329169 M
## 9 0.782380194 0.93715691 M
## 10 1.784287809 0.74337479 M
## 11 1.934080462 1.47897609 M
## 12 0.744203377 2.70773324 M
## 13 1.002499594 0.07158504 M
## 14 1.281675939 -0.27257795 M
## 15 1.390588252 0.33359231 M
## 16 -0.180527407 -0.62433567 M
## 17 -0.168320027 -1.80173334 M
## 18 -0.132380328 -0.09011428 M
## 19 -0.789109306 -0.36302243 M
## 20 0.401039618 0.45156422 M
## 21 1.243886483 1.44694263 M
## 22 0.142960831 2.42255907 M
## 23 0.625610136 0.17668535 M
## 24 0.537644499 -0.19150300 M
## 25 0.157176488 1.02691760 M
## 26 -2.902302684 -1.49282840 R
## 27 -0.588710342 -0.46833442 R
## 28 -0.597170869 -1.70542147 R
## 29 -0.289270938 -1.17938825 R
## 30 0.544629613 -1.61762409 R
## 31 0.064360561 0.15607894 R
## 32 -0.168320027 -1.67226688 R
## 33 -0.316256467 -0.26298086 R
## 34 -1.638716948 -1.51501038 R
## 35 0.274190661 0.01776805 R
## 36 -0.860055136 -2.21946189 R
## 37 -0.789109306 -0.84516218 R
## 38 -1.427539411 -1.35491277 R
## 39 0.779483548 -1.33935885 R
## 40 -0.400799099 -2.26338258 R
## 41 -0.302692686 -0.92399268 R
## 42 0.827857790 0.18656255 R
## 43 -0.829167731 0.42261407 R
## 44 -2.434915803 -1.57286451 R
## 45 -1.138280881 -0.92728240 R
## 46 -0.860055136 -1.28443852 R
## 47 -1.125398710 0.66990478 R
## 48 -0.186674974 -1.39954451 R
## 49 -0.008605128 -1.57143555 R
## 50 -1.348878827 -0.41384379 R
PMA_PreModelling_Train_LR_US_NEARMISS$Label <- rep("LR_US_NEARMISS",nrow(PMA_PreModelling_Train_LR_US_NEARMISS))
##################################
# Verifying the class distribution
# for the undersampled data using US_NEARMISS
##################################
table(PMA_PreModelling_Train_LR_US_NEARMISS$Class) ##
## M R
## 25 25
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_US_NEARMISS_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_US_NEARMISS,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_NEARMISS_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_NEARMISS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.01997 -0.52262 -0.00514 0.64473 2.00409
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5276 0.4512 -1.169 0.2423
## V1 -1.3388 0.5264 -2.543 0.0110 *
## V11 -1.2227 0.4795 -2.550 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.315 on 49 degrees of freedom
## Residual deviance: 40.340 on 47 degrees of freedom
## AIC: 46.34
##
## Number of Fisher Scoring iterations: 5
LR_US_NEARMISS_Model_Coef <- (as.data.frame(LR_US_NEARMISS_Model$coefficients))
LR_US_NEARMISS_Model_Coef$Coef <- rownames(LR_US_NEARMISS_Model_Coef)
LR_US_NEARMISS_Model_Coef$Model <- rep("LR_US_NEARMISS",nrow(LR_US_NEARMISS_Model_Coef))
colnames(LR_US_NEARMISS_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_NEARMISS_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -0.5275579 (Intercept) LR_US_NEARMISS
## V1 -1.3388434 V1 LR_US_NEARMISS
## V11 -1.2227451 V11 LR_US_NEARMISS
##################################
# Computing the model predictions
##################################
(LR_US_NEARMISS_Model_Probabilities <- predict(LR_US_NEARMISS_Model,
type = c("response")))## 1 2 3 4 5 6
## 0.538475404 0.060711209 0.663604862 0.245308514 0.788060819 0.355268290
## 7 8 9 10 11 12
## 0.100700058 0.006710875 0.061748607 0.021344245 0.007207500 0.007885623
## 13 14 15 16 17 18
## 0.123761916 0.128955243 0.057473709 0.617167616 0.869988474 0.440253186
## 19 20 21 22 23 24
## 0.725681187 0.165669455 0.018666733 0.024575272 0.170626580 0.266350095
## 25 26 27 28 29 30
## 0.119871133 0.994423044 0.697049432 0.913509256 0.786141426 0.672873462
## 31 32 33 34 35 36
## 0.309048509 0.851010973 0.554141527 0.971220746 0.285695645 0.965701640
## 37 38 39 40 41 42
## 0.826690849 0.954366549 0.516626398 0.941397563 0.732532124 0.134231886
## 43 44 45 46 47 48
## 0.516448710 0.990581586 0.893808927 0.899751107 0.539927799 0.807470453
## 49 50
## 0.803043836 0.856239957
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_US_NEARMISS_Model_Indices <- predict(LR_US_NEARMISS_Model,
type = c("link")))## 1 2 3 4 5 6
## 0.15420647 -2.73899464 0.67940041 -1.12379238 1.31327592 -0.59596102
## 7 8 9 10 11 12
## -2.18947025 -4.99729239 -2.72094650 -3.82539779 -4.92539958 -4.83479722
## 13 14 15 16 17 18
## -1.95727815 -1.91022797 -2.79723621 0.47754340 1.90085685 -0.24013458
## 19 20 21 22 23 24
## 0.97281980 -1.61663509 -3.96216914 -3.68113228 -1.58119307 -1.01322036
## 25 26 27 28 29 30
## -1.99365106 5.18351957 0.83328668 2.35725611 1.30182178 0.72121023
## 31 32 33 34 35 36
## -0.80457137 1.74255237 0.21741854 3.51889902 -0.91638207 3.33775738
## 37 38 39 40 41 42
## 1.56235380 3.04040680 0.06653012 2.77658926 1.00750772 -1.86404831
## 43 44 45 46 47 48
## 0.06581859 4.65562548 2.13025197 2.19446215 0.16005199 1.43365683
## 49 50
## 1.40542811 1.78440512
max(LR_US_NEARMISS_Model_Indices)## [1] 5.18352
min(LR_US_NEARMISS_Model_Indices)## [1] -4.997292
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_US_NEARMISS_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_US_NEARMISS)
LR_US_NEARMISS_Model_Predictions$LR_US_NEARMISS_Prob <- LR_US_NEARMISS_Model_Probabilities
LR_US_NEARMISS_Model_Predictions$LR_US_NEARMISS_LP <- LR_US_NEARMISS_Model_Indices
LR_US_NEARMISS_Model_Predictions$Class <- as.factor(LR_US_NEARMISS_Model_Predictions$Class)
LR_US_NEARMISS_Model_Predictions$Label <- rep("LR_US_NEARMISS",nrow(LR_US_NEARMISS_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_US_NEARMISS_Model_Predictions %>%
ggplot(aes(x = LR_US_NEARMISS_LP ,
y = LR_US_NEARMISS_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_NEARMISS)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Tomek Links") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing US_TOMEK
# Visualizing the undersampled data using US_TOMEK
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_tomek(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Tomek Links") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")US_TOMEK <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_tomek(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_US_TOMEK <- US_TOMEK %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_US_TOMEK <- as.data.frame(PMA_PreModelling_Train_LR_US_TOMEK))## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.738227771 -1.15770286 M
## 11 1.169078303 -0.79246144 M
## 12 0.118911997 -1.30982705 M
## 13 1.012209983 -0.76338471 M
## 14 0.207991857 -0.39104999 M
## 15 -0.957451233 0.02573746 M
## 16 0.695599819 -0.11827986 M
## 17 -0.068999246 0.13149291 M
## 18 0.527094931 -0.39650480 M
## 19 0.104263091 0.33286517 M
## 20 0.465616266 -0.56497212 M
## 21 -0.507041623 -0.24127887 M
## 22 0.043933590 0.30731955 M
## 23 0.147716923 -0.20173692 M
## 24 0.612463710 0.84401925 M
## 25 -0.068999246 0.63930609 M
## 26 1.395977603 1.55085071 M
## 27 -0.180527407 1.02812486 M
## 28 0.565359423 0.74012531 M
## 29 2.230423635 1.21329169 M
## 30 0.782380194 0.93715691 M
## 31 2.005116912 0.95072168 M
## 32 1.784287809 0.74337479 M
## 33 1.523834205 1.62207084 M
## 34 1.928176420 1.77732114 M
## 35 1.934080462 1.47897609 M
## 36 0.744203377 2.70773324 M
## 37 1.424344244 -0.63319889 M
## 38 0.689392711 -0.83030322 M
## 39 1.381555338 -0.09258957 M
## 40 1.002499594 0.07158504 M
## 41 0.977896818 0.15071601 M
## 42 0.278484686 -2.26945660 M
## 43 1.406688795 0.06372536 M
## 44 1.852613244 0.04399440 M
## 45 1.281675939 -0.27257795 M
## 46 1.390588252 0.33359231 M
## 47 -0.192852168 -0.57752146 M
## 48 -0.180527407 -0.62433567 M
## 49 -0.052243905 -0.17198030 M
## 50 -0.779287302 -0.12994977 M
## 51 -0.041194136 -1.17696945 M
## 52 -2.093160440 0.59563328 M
## 53 -0.230553865 -0.22058604 M
## 54 -1.038676203 0.48516249 M
## 55 -0.030238811 0.37335493 M
## 56 0.089444831 0.04161877 M
## 57 0.303951059 -0.21715050 M
## 58 -0.132380328 -0.09011428 M
## 59 -1.546463816 0.01457488 M
## 60 -0.499153812 0.87301303 M
## 61 -0.108968439 0.07550792 M
## 62 0.686277848 -1.02337267 M
## 63 -0.721885235 -1.35101476 M
## 64 -3.557061230 -0.28132844 M
## 65 -0.789109306 -0.36302243 M
## 66 -0.750264961 -0.03937819 M
## 67 -0.379134945 2.23365699 M
## 68 -0.371994173 0.60103638 M
## 69 0.401039618 0.45156422 M
## 70 -0.295964244 1.50754826 M
## 71 0.256869245 1.19827720 M
## 72 1.243886483 1.44694263 M
## 73 -0.230553865 0.36255507 M
## 74 -0.256318859 -0.63813480 M
## 75 0.632133128 0.46700244 M
## 76 0.324793230 1.73570326 M
## 77 0.723165726 0.69367752 M
## 78 -1.191144673 1.19711994 M
## 79 -1.177722925 1.07730973 M
## 80 0.142960831 2.42255907 M
## 81 0.443194144 1.74741590 M
## 82 -0.180527407 0.75375436 M
## 83 0.544629613 0.36615870 M
## 84 -1.274767709 0.41409622 M
## 85 -0.539087424 -0.07530230 M
## 86 -0.555415865 0.01776805 M
## 87 0.345301912 -0.27257795 M
## 88 -1.868589923 0.02175517 M
## 89 -0.217863016 -0.21200423 M
## 90 0.625610136 0.17668535 M
## 91 -0.799007156 -0.01833539 M
## 92 -2.013850697 0.05347991 M
## 93 0.537644499 -0.19150300 M
## 94 -0.013979414 0.15531332 M
## 95 -0.935159101 0.53154288 M
## 96 -0.779287302 0.73036007 M
## 97 0.424204681 0.54321732 M
## 98 0.157176488 1.02691760 M
## 99 -0.323092564 0.48794823 M
## 100 0.377448180 0.47190071 M
## 101 0.992715078 0.20396774 M
## 102 0.295518362 0.33867807 M
## 103 -2.902302684 -1.49282840 R
## 104 -0.597170869 -1.70542147 R
## 105 -0.289270938 -1.17938825 R
## 106 -0.316256467 -0.26298086 R
## 107 -1.638716948 -1.51501038 R
## 108 0.274190661 0.01776805 R
## 109 -0.860055136 -2.21946189 R
## 110 -0.789109306 -0.84516218 R
## 111 -1.427539411 -1.35491277 R
## 112 0.779483548 -1.33935885 R
## 113 -0.400799099 -2.26338258 R
## 114 -0.302692686 -0.92399268 R
## 115 -0.829167731 0.42261407 R
## 116 -2.434915803 -1.57286451 R
## 117 -1.138280881 -0.92728240 R
## 118 -0.008605128 -1.57143555 R
PMA_PreModelling_Train_LR_US_TOMEK$Label <- rep("LR_US_TOMEK",nrow(PMA_PreModelling_Train_LR_US_TOMEK))
##################################
# Verifying the class distribution
# for the undersampled data using US_TOMEK
##################################
table(PMA_PreModelling_Train_LR_US_TOMEK$Class) ##
## M R
## 102 16
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_US_TOMEK_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_US_TOMEK,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_TOMEK_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_TOMEK)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.88129 -0.38305 -0.18048 -0.04904 2.61741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.1192 0.5681 -5.491 4.00e-08 ***
## V1 -0.8550 0.3663 -2.334 0.0196 *
## V11 -2.1768 0.5199 -4.187 2.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 93.664 on 117 degrees of freedom
## Residual deviance: 51.097 on 115 degrees of freedom
## AIC: 57.097
##
## Number of Fisher Scoring iterations: 7
LR_US_TOMEK_Model_Coef <- (as.data.frame(LR_US_TOMEK_Model$coefficients))
LR_US_TOMEK_Model_Coef$Coef <- rownames(LR_US_TOMEK_Model_Coef)
LR_US_TOMEK_Model_Coef$Model <- rep("LR_US_TOMEK",nrow(LR_US_TOMEK_Model_Coef))
colnames(LR_US_TOMEK_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_TOMEK_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -3.1192250 (Intercept) LR_US_TOMEK
## V1 -0.8549776 V1 LR_US_TOMEK
## V11 -2.1767958 V11 LR_US_TOMEK
##################################
# Computing the model predictions
##################################
(LR_US_TOMEK_Model_Probabilities <- predict(LR_US_TOMEK_Model,
type = c("response")))## 1 2 3 4 5 6
## 3.756409e-01 3.235729e-03 7.183859e-02 2.697569e-04 5.004968e-04 4.923252e-03
## 7 8 9 10 11 12
## 2.835075e-02 3.922375e-03 8.317828e-02 2.261271e-01 8.365176e-02 4.086217e-01
## 13 14 15 16 17 18
## 8.924332e-02 7.974495e-02 8.653858e-02 3.057631e-02 3.401110e-02 6.257509e-02
## 19 20 21 22 23 24
## 1.920963e-02 9.216455e-02 1.033540e-01 2.133670e-02 5.698041e-02 4.151500e-03
## 25 26 27 28 29 30
## 1.152242e-02 4.577910e-04 5.470520e-03 5.412073e-03 4.676720e-04 2.934903e-03
## 31 32 33 34 35 36
## 1.003676e-03 1.902036e-03 3.514870e-04 1.774511e-04 3.379585e-04 6.444174e-05
## 37 38 39 40 41 42
## 4.932868e-02 1.299682e-01 1.632055e-02 1.579461e-02 1.360818e-02 8.296036e-01
## 43 44 45 46 47 48
## 1.142309e-02 8.171200e-03 2.604136e-02 6.468579e-03 1.548320e-01 1.671688e-01
## 49 50 51 52 53 54
## 6.296205e-02 1.024699e-01 3.723887e-01 6.747167e-02 8.003027e-02 3.601015e-02
## 55 56 57 58 59 60
## 1.972224e-02 3.604439e-02 5.183789e-02 5.679248e-02 1.383861e-01 1.002269e-02
## 61 62 63 64 65 66
## 3.952765e-02 1.856837e-01 6.079859e-01 6.305035e-01 1.605248e-01 8.378117e-02
## 67 68 69 70 71 72
## 4.723640e-04 1.615066e-02 1.160016e-02 2.133604e-03 2.606214e-03 6.535624e-04
## 73 74 75 76 77 78
## 2.386204e-02 1.807923e-01 9.227894e-03 7.647569e-04 5.233073e-03 8.953573e-03
## 79 80 81 82 83 84
## 1.145983e-02 2.004250e-04 6.737929e-04 9.896313e-03 1.234693e-02 5.065520e-02
## 85 86 87 88 89 90
## 7.625172e-02 6.398192e-02 5.619436e-02 1.723620e-01 7.788281e-02 1.731500e-02
## 91 92 93 94 95 96
## 8.346450e-02 1.803700e-01 4.061963e-02 3.090759e-02 2.998119e-02 1.724570e-02
## 97 98 99 100 101 102
## 9.337053e-03 4.114982e-03 1.974037e-02 1.132709e-02 1.198585e-02 1.615689e-02
## 103 104 105 106 107 108
## 9.316147e-01 7.509522e-01 4.244291e-01 9.309838e-02 8.291634e-01 3.253605e-02
## 109 110 111 112 113 114
## 9.203724e-01 3.532443e-01 7.409012e-01 2.952220e-01 8.957034e-01 2.996246e-01
## 115 116 117 118
## 3.454790e-02 9.157827e-01 4.681641e-01 5.765988e-01
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_US_TOMEK_Model_Indices <- predict(LR_US_TOMEK_Model,
type = c("link")))## 1 2 3 4 5 6 7
## -0.5080921 -5.7302601 -2.5587838 -8.2177196 -7.5994087 -5.3088505 -3.5343414
## 8 9 10 11 12 13 14
## -5.5371279 -2.3999268 -1.2303105 -2.3937341 -0.3696661 -2.3229093 -2.4458174
## 15 16 17 18 19 20 21
## -2.3566509 -3.4564762 -3.3464654 -2.7067694 -3.9329471 -2.2874876 -2.1605009
## 22 23 24 25 26 27 28
## -3.8257592 -2.8063796 -5.4801254 -4.4518710 -7.6886399 -5.2028960 -5.2136964
## 29 30 31 32 33 34 35
## -7.6672756 -5.8281418 -6.9030821 -6.2629263 -7.9529862 -8.6366379 -7.9922495
## 36 37 38 39 40 41 42
## -9.6496846 -2.9586628 -1.9012398 -4.0988753 -4.1321657 -4.2833829 1.5828204
## 43 44 45 46 47 48 49
## -4.4606296 -4.7989347 -3.6216827 -5.0343092 -1.6971944 -1.6058269 -2.7001917
## 50 51 52 53 54 55 56
## -2.1700777 -0.5219828 -2.6261917 -2.4419359 -3.2872798 -3.9060890 -3.2862939
## 57 58 59 60 61 62 63
## -2.9064041 -2.8098824 -1.8287596 -4.5928308 -3.1904248 -1.4783039 0.4388540
## 64 65 66 67 68 69 70
## 0.5343773 -1.6543285 -2.3920470 -7.6572883 -4.1095118 -4.4450680 -6.1478070
## 71 72 73 74 75 76 77
## -5.9472473 -7.3324188 -3.7113150 -1.5109890 -4.6762536 -7.1751876 -5.2475099
## 78 79 80 81 82 83 84
## -4.7067087 -4.4573816 -8.5148698 -7.3019137 -4.6056475 -4.3819239 -2.9307301
## 85 86 87 88 89 90 91
## -2.4943996 -2.6830343 -2.8211039 -1.5689790 -2.4714671 -4.0387156 -2.3961794
## 92 93 94 95 96 97 98
## -1.5138426 -3.1620361 -3.4453583 -3.4767452 -4.0427966 -4.6643837 -5.4889973
## 99 100 101 102 103 104 105
## -3.9051518 -4.4691663 -4.4119703 -4.1091196 2.6117614 1.1036970 -0.3046174
## 106 107 108 109 110 111 112
## -2.2763772 1.5797095 -3.3923293 2.4474182 -0.6048087 1.0506577 -0.8701553
## 113 114 115 116 117 118
## 2.1503710 -0.8490861 -3.3302497 2.3863784 -0.1275159 0.3088265
max(LR_US_TOMEK_Model_Indices)## [1] 2.611761
min(LR_US_TOMEK_Model_Indices)## [1] -9.649685
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_US_TOMEK_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_US_TOMEK)
LR_US_TOMEK_Model_Predictions$LR_US_TOMEK_Prob <- LR_US_TOMEK_Model_Probabilities
LR_US_TOMEK_Model_Predictions$LR_US_TOMEK_LP <- LR_US_TOMEK_Model_Indices
LR_US_TOMEK_Model_Predictions$Class <- as.factor(LR_US_TOMEK_Model_Predictions$Class)
LR_US_TOMEK_Model_Predictions$Label <- rep("LR_US_TOMEK",nrow(LR_US_TOMEK_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_US_TOMEK_Model_Predictions %>%
ggplot(aes(x = LR_US_TOMEK_LP ,
y = LR_US_TOMEK_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_TOMEK)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
labs(title = "Without Oversampling - Adaptive Synthetic Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing OS_ADASYN
# Visualizing the oversampled data using OS_ADASYN
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_adasyn(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Adaptive Synthetic Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")OS_ADASYN <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_adasyn(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_OS_ADASYN <- OS_ADASYN %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_OS_ADASYN <- as.data.frame(PMA_PreModelling_Train_LR_OS_ADASYN))## V1 V11 Class
## 1 0.914240973 -1.558615871 M
## 2 2.175018299 0.345206089 M
## 3 -0.230553865 -0.166907171 M
## 4 1.231717734 1.858421213 M
## 5 0.424204681 1.891540835 M
## 6 1.143139503 0.556904232 M
## 7 -0.507041623 0.389850825 M
## 8 0.312328639 0.988089406 M
## 9 -0.935159101 0.036862390 M
## 10 0.408807916 -1.630790485 M
## 11 0.738227771 -1.157702863 M
## 12 1.169078303 -0.792461442 M
## 13 0.118911997 -1.309827049 M
## 14 -0.174409184 -1.318776567 M
## 15 1.012209983 -0.763384712 M
## 16 0.813818599 0.266052702 M
## 17 0.207991857 -0.391049992 M
## 18 -0.957451233 0.025737464 M
## 19 0.695599819 -0.118279862 M
## 20 -1.204708453 -0.186398477 M
## 21 -0.068999246 0.131492907 M
## 22 0.527094931 -0.396504803 M
## 23 0.104263091 0.332865171 M
## 24 0.465616266 -0.564972117 M
## 25 -0.507041623 -0.241278874 M
## 26 0.043933590 0.307319548 M
## 27 0.147716923 -0.201736917 M
## 28 0.612463710 0.844019251 M
## 29 -0.068999246 0.639306087 M
## 30 1.395977603 1.550850706 M
## 31 -0.180527407 1.028124858 M
## 32 0.565359423 0.740125313 M
## 33 2.230423635 1.213291690 M
## 34 0.782380194 0.937156911 M
## 35 2.005116912 0.950721679 M
## 36 1.784287809 0.743374785 M
## 37 1.523834205 1.622070841 M
## 38 1.928176420 1.777321144 M
## 39 -1.204708453 0.652640830 M
## 40 1.934080462 1.478976095 M
## 41 0.744203377 2.707733238 M
## 42 1.424344244 -0.633198890 M
## 43 -0.491314245 -0.582362640 M
## 44 0.689392711 -0.830303223 M
## 45 1.381555338 -0.092589575 M
## 46 1.002499594 0.071585037 M
## 47 0.977896818 0.150716010 M
## 48 0.278484686 -2.269456602 M
## 49 1.406688795 0.063725362 M
## 50 1.852613244 0.043994401 M
## 51 1.281675939 -0.272577947 M
## 52 1.390588252 0.333592312 M
## 53 -0.192852168 -0.577521460 M
## 54 -0.180527407 -0.624335667 M
## 55 -0.052243905 -0.171980296 M
## 56 -0.779287302 -0.129949769 M
## 57 -0.041194136 -1.176969454 M
## 58 -0.924156756 -1.249334984 M
## 59 -0.168320027 -1.801733345 M
## 60 -2.093160440 0.595633277 M
## 61 -0.230553865 -0.220586042 M
## 62 -1.038676203 0.485162490 M
## 63 -0.030238811 0.373354928 M
## 64 0.089444831 0.041618772 M
## 65 0.303951059 -0.217150498 M
## 66 -0.132380328 -0.090114281 M
## 67 -1.546463816 0.014574884 M
## 68 -0.499153812 0.873013033 M
## 69 -0.108968439 0.075507923 M
## 70 0.686277848 -1.023372673 M
## 71 -0.721885235 -1.351014760 M
## 72 -3.557061230 -0.281328435 M
## 73 -0.789109306 -0.363022432 M
## 74 -0.750264961 -0.039378188 M
## 75 -0.379134945 2.233656987 M
## 76 -0.371994173 0.601036378 M
## 77 0.401039618 0.451564218 M
## 78 -0.295964244 1.507548259 M
## 79 0.256869245 1.198277196 M
## 80 1.243886483 1.446942626 M
## 81 -0.230553865 0.362555066 M
## 82 -0.256318859 -0.638134800 M
## 83 0.632133128 0.467002438 M
## 84 0.324793230 1.735703263 M
## 85 0.723165726 0.693677522 M
## 86 -1.191144673 1.197119945 M
## 87 -1.177722925 1.077309728 M
## 88 0.142960831 2.422559073 M
## 89 0.443194144 1.747415902 M
## 90 -0.180527407 0.753754356 M
## 91 0.544629613 0.366158697 M
## 92 -1.274767709 0.414096217 M
## 93 -0.539087424 -0.075302304 M
## 94 -0.555415865 0.017768054 M
## 95 0.345301912 -0.272577947 M
## 96 -1.868589923 0.021755168 M
## 97 -0.217863016 -0.212004228 M
## 98 0.625610136 0.176685353 M
## 99 -0.799007156 -0.018335390 M
## 100 -2.013850697 0.053479912 M
## 101 0.537644499 -0.191503002 M
## 102 -0.013979414 0.155313323 M
## 103 -0.935159101 0.531542880 M
## 104 -0.779287302 0.730360068 M
## 105 0.424204681 0.543217319 M
## 106 0.157176488 1.026917595 M
## 107 -0.323092564 0.487948233 M
## 108 0.377448180 0.471900708 M
## 109 0.992715078 0.203967739 M
## 110 0.295518362 0.338678072 M
## 111 0.099342681 0.206985690 M
## 112 -2.902302684 -1.492828399 R
## 113 -0.588710342 -0.468334419 R
## 114 -0.597170869 -1.705421467 R
## 115 -0.289270938 -1.179388252 R
## 116 0.544629613 -1.617624095 R
## 117 0.064360561 0.156078935 R
## 118 -0.168320027 -1.672266879 R
## 119 -0.316256467 -0.262980859 R
## 120 -1.638716948 -1.515010380 R
## 121 0.274190661 0.017768054 R
## 122 -0.860055136 -2.219461886 R
## 123 -0.789109306 -0.845162176 R
## 124 -1.427539411 -1.354912772 R
## 125 0.779483548 -1.339358851 R
## 126 -0.400799099 -2.263382579 R
## 127 -0.302692686 -0.923992684 R
## 128 0.827857790 0.186562547 R
## 129 -0.829167731 0.422614069 R
## 130 -2.434915803 -1.572864509 R
## 131 -1.138280881 -0.927282397 R
## 132 -0.860055136 -1.284438518 R
## 133 -1.125398710 0.669904780 R
## 134 -0.186674974 -1.399544510 R
## 135 -0.008605128 -1.571435552 R
## 136 -1.348878827 -0.413843793 R
## 137 -0.390322558 -0.784388451 R
## 138 -0.295860799 -1.053993156 R
## 139 -0.017377454 -1.559181951 R
## 140 -0.191911171 -1.315384987 R
## 141 -0.290855568 -1.149235140 R
## 142 -0.035273087 -1.662069728 R
## 143 0.346506687 -1.632808882 R
## 144 0.682389831 -1.454399746 R
## 145 -0.356332622 -0.246153717 R
## 146 0.266972171 0.022526169 R
## 147 0.652376435 0.179556227 R
## 148 -0.342166539 0.277343988 R
## 149 0.104992266 0.129296282 R
## 150 0.128805610 0.158651987 R
## 151 -0.082002824 0.016138154 R
## 152 -0.096321170 0.002448115 R
## 153 -0.178557486 -1.673058340 R
## 154 -0.063390608 -1.606022761 R
## 155 -0.176865876 -1.545290531 R
## 156 -0.180838432 -1.486265301 R
## 157 -0.358706919 -2.156356393 R
## 158 -0.349557028 -0.303980855 R
## 159 -0.582661828 -0.590981902 R
## 160 0.038582729 -0.094260047 R
## 161 0.035702482 0.124526356 R
## 162 -0.574843937 -0.457883052 R
## 163 0.082506030 -0.073375165 R
## 164 -0.529153404 -0.423445246 R
## 165 -0.390201423 -0.354022656 R
## 166 -0.059449742 0.019763646 R
## 167 -0.557747942 -0.560307667 R
## 168 -0.308189318 -0.656122036 R
## 169 0.219359952 0.053910074 R
## 170 0.748334227 0.162318486 R
## 171 0.167225292 0.088274976 R
## 172 0.248268872 0.034854570 R
## 173 0.755598258 0.164533045 R
## 174 0.219609003 -0.008184720 R
## 175 -0.130244888 -0.642472049 R
## 176 -0.784077436 -2.070896097 R
## 177 -0.647882130 -1.804581595 R
## 178 -0.512560514 -2.252694339 R
## 179 -0.807753229 -0.960600033 R
## 180 -0.318364841 -0.921452795 R
## 181 -0.557028431 -1.000347302 R
## 182 -1.342933236 -1.229833763 R
## 183 -1.325016764 -1.203346635 R
## 184 -0.862607677 -0.903846499 R
## 185 0.045146002 -1.229443601 R
## 186 0.437074722 -1.360688788 R
## 187 -0.077403279 -1.640333136 R
## 188 -0.125638032 -1.657275187 R
## 189 -0.517275875 -0.889216525 R
## 190 -0.294167245 -1.086218926 R
## 191 -0.310113401 -0.562354563 R
## 192 -0.297566301 -1.021540043 R
## 193 -0.310870399 -0.525463291 R
## 194 0.703852721 0.148757571 R
## 195 -0.050300971 -0.158482083 R
## 196 0.450144116 0.038151940 R
## 197 0.112591704 -0.144113810 R
## 198 0.727653830 0.156013729 R
## 199 0.205014899 -0.058163811 R
## 200 0.820894004 -0.033103728 R
## 201 -0.725082580 0.036955271 R
## 202 -0.655946484 -0.219209453 R
## 203 -0.723124380 0.280868725 R
## 204 -1.121817738 0.666915420 R
## 205 -1.106401172 -0.023584008 R
## 206 -0.157000177 0.251678836 R
## 207 -1.123828064 0.666573667 R
## 208 -0.938882051 0.514202506 R
## 209 -0.930462819 0.256473662 R
## 210 -0.872827627 0.459060921 R
## 211 -0.171945746 -1.413762652 R
## 212 -0.180283482 -1.405714226 R
## 213 -0.016300987 -1.560685612 R
## 214 -0.083518808 -1.618730111 R
## 215 -0.505285997 -1.684504044 R
## 216 -1.174123534 -0.839897872 R
## 217 -1.279860500 -0.582110741 R
## 218 -1.377082750 -0.751266116 R
## 219 -0.970648251 -0.705281241 R
## 220 -1.190222435 -0.536093069 R
## 221 -1.196659723 -0.784954555 R
## 222 -1.253287340 -0.646896207 R
PMA_PreModelling_Train_LR_OS_ADASYN$Label <- rep("LR_OS_ADASYN",nrow(PMA_PreModelling_Train_LR_OS_ADASYN))
##################################
# Verifying the class distribution
# for the oversampled data using OS_ADASYN
##################################
table(PMA_PreModelling_Train_LR_OS_ADASYN$Class) ##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_OS_ADASYN_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_OS_ADASYN,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_ADASYN_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_ADASYN)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.19658 -0.90252 0.05818 0.80875 1.68379
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4101 0.1704 -2.406 0.0161 *
## V1 -0.5944 0.2092 -2.841 0.0045 **
## V11 -1.2753 0.2148 -5.938 2.89e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 238.51 on 219 degrees of freedom
## AIC: 244.51
##
## Number of Fisher Scoring iterations: 4
LR_OS_ADASYN_Model_Coef <- (as.data.frame(LR_OS_ADASYN_Model$coefficients))
LR_OS_ADASYN_Model_Coef$Coef <- rownames(LR_OS_ADASYN_Model_Coef)
LR_OS_ADASYN_Model_Coef$Model <- rep("LR_OS_ADASYN",nrow(LR_OS_ADASYN_Model_Coef))
colnames(LR_OS_ADASYN_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_ADASYN_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -0.4101181 (Intercept) LR_OS_ADASYN
## V1 -0.5943822 V1 LR_OS_ADASYN
## V11 -1.2753114 V11 LR_OS_ADASYN
##################################
# Computing the model predictions
##################################
(LR_OS_ADASYN_Model_Probabilities <- predict(LR_OS_ADASYN_Model,
type = c("response")))## 1 2 3 4 5 6 7
## 0.73772818 0.10497255 0.48494896 0.02896442 0.04416885 0.14187466 0.35298982
## 8 9 10 11 12 13 14
## 0.13518371 0.52465818 0.80638030 0.65192786 0.47642692 0.76667670 0.79825096
## 15 16 17 18 19 20 21
## 0.49045006 0.22563277 0.49124234 0.53149589 0.33788658 0.63266232 0.36893216
## 22 23 24 25 26 27 28
## 0.44577737 0.28975032 0.50841003 0.54957759 0.30403436 0.44012839 0.13580915
## 29 30 31 32 33 34 35
## 0.23425830 0.03850579 0.16603467 0.15576953 0.03615320 0.11201542 0.05655150
## 36 37 38 39 40 41 42
## 0.08175770 0.03278329 0.02139795 0.37136132 0.03089268 0.01331233 0.38955167
## 43 44 45 46 47 48 49
## 0.65126495 0.55947074 0.24727303 0.25025003 0.23441048 0.91040741 0.20957344
## 50 51 52 53 54 55 56
## 0.17258950 0.30485130 0.15948123 0.60850432 0.62090861 0.46015049 0.55448352
## 57 58 59 60 61 62 63
## 0.75312881 0.84972878 0.87949954 0.51859174 0.50205872 0.39855712 0.29561056
## 64 65 66 67 68 69 70
## 0.37370392 0.42217609 0.44608300 0.62022067 0.22674314 0.39134899 0.61942054
## 71 72 73 74 75 76 77
## 0.85093106 0.88724633 0.62758752 0.52149811 0.04594066 0.27777607 0.22716764
## 78 79 80 81 82 83 84
## 0.10369789 0.10997728 0.04766295 0.32399955 0.63553899 0.20078099 0.05642670
## 85 86 87 88 89 90 91
## 0.15127834 0.22638738 0.25274867 0.02699838 0.05205260 0.22027108 0.23133247
## 92 93 94 95 96 97 98
## 0.45499203 0.50158494 0.47436036 0.43346259 0.66212847 0.49743683 0.26750888
## 99 100 101 102 103 104 105
## 0.52203094 0.67231525 0.38096402 0.35437073 0.37001350 0.29351771 0.20504940
## 106 107 108 109 110 111 112
## 0.14025414 0.30145821 0.22508296 0.22092352 0.26547781 0.32450675 0.96153492
## 113 114 115 116 117 118 119
## 0.63113128 0.89281022 0.78004124 0.79069806 0.34357069 0.86087655 0.52828019
## 120 121 122 123 124 125 126
## 0.92386207 0.35531618 0.94938606 0.75708417 0.89718416 0.69734156 0.93788428
## 127 128 129 130 131 132 133
## 0.72074811 0.24230065 0.38787815 0.95448129 0.80984905 0.85057811 0.35536511
## 134 135 136 137 138 139 140
## 0.81543098 0.83188786 0.71492212 0.69470786 0.75211155 0.83042655 0.79922795
## 141 142 143 144 145 146 147
## 0.77353736 0.84947689 0.81248892 0.73867519 0.52886844 0.35490911 0.26369154
## 148 149 150 151 152 153 154
## 0.36344558 0.34583074 0.33425366 0.40565468 0.41193095 0.86172405 0.84233953
## 155 156 157 158 159 160 161
## 0.84100629 0.83101251 0.92778263 0.54619851 0.66594284 0.42242166 0.35660069
## 162 163 164 165 166 167 168
## 0.62609548 0.40960899 0.60931311 0.56790283 0.40131546 0.65384058 0.64790067
## 169 170 171 172 173 174 175
## 0.35222942 0.25694247 0.34930583 0.35385536 0.25558129 0.37046751 0.61931673
## 176 177 178 179 180 181 182
## 0.93684708 0.90690144 0.94088997 0.78500246 0.72196940 0.76794150 0.87615569
## 183 184 185 186 187 188 189
## 0.87125377 0.77821361 0.75601813 0.74372008 0.84913396 0.85546221 0.73718131
## 190 191 192 193 194 195 196
## 0.75951039 0.62043265 0.74450785 0.60939910 0.26538054 0.45559081 0.32600030
## 197 198 199 200 201 202 203
## 0.42720825 0.26084358 0.38750825 0.29821230 0.49343253 0.56447039 0.41617366
## 204 205 206 207 208 209 210
## 0.35575095 0.56895381 0.34575106 0.35612479 0.37570197 0.45409182 0.38301500
## 211 212 213 214 215 216 217
## 0.81683819 0.81604264 0.83060641 0.84604459 0.88477862 0.79557946 0.74894259
## 218 219 220 221 222
## 0.79681306 0.74388715 0.72730948 0.78620781 0.76129777
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_OS_ADASYN_Model_Indices <- predict(LR_OS_ADASYN_Model,
type = c("link")))## 1 2 3 4 5 6
## 1.034194007 -2.143155516 -0.060222356 -3.512295020 -3.074561474 -1.799806198
## 7 8 9 10 11 12
## -0.605922800 -1.855882401 0.098712797 1.426659526 0.627524174 -0.094362275
## 13 14 15 16 17 18
## 1.189640156 1.375398467 -0.038204425 -1.233137433 -0.035034220 0.126150588
## 19 20 21 22 23 24
## -0.672726578 0.543655270 -0.536800580 -0.217746823 -0.896596981 0.033643295
## 25 26 27 28 29 30
## 0.198964127 -0.828159573 -0.240641004 -1.850543025 -1.184420539 -3.217679974
## 31 32 33 34 35 36
## -1.613995214 -1.690047948 -3.283166961 -2.070317881 -2.814390118 -2.418701365
## 37 38 39 40 41 42
## -3.384503515 -3.822829815 -0.526381159 -3.445856218 -4.305662512 -0.449197159
## 43 44 45 46 47 48
## 0.624604084 0.239014356 -1.113209442 -1.097279217 -1.183572402 2.318619541
## 49 50 51 52 53 54
## -1.327498549 -1.567384980 -0.824301675 -1.662093081 0.441029528 0.493406603
## 55 56 57 58 59 60
## -0.159736808 0.218802829 1.115369581 1.732475427 1.987699485 0.074401266
## 61 62 63 64 65 66
## 0.008234922 -0.411480728 -0.868288495 -0.516359404 -0.313846676 -0.216509810
## 67 68 69 70 71 72
## 0.490484945 -1.226793467 -0.441645313 0.487089454 1.741922218 2.062917134
## 73 74 75 76 77 78
## 0.521881088 0.086045493 -3.033375348 -0.955519951 -1.224373916 -2.156795762
## 79 80 81 82 83 84
## -2.090973219 -2.994764558 -0.735451606 0.556053884 -1.381420321 -2.816731639
## 85 86 87 88 89 90
## -1.724609805 -1.228823674 -1.084005982 -3.584608772 -2.902044296 -1.264087373
## 91 92 93 94 95 96
## -1.200802614 -0.180520511 0.006339761 -0.102648596 -0.267737627 0.672793868
## 97 98 99 100 101 102
## -0.010252778 -1.007298471 0.088180865 0.718675356 -0.485458441 -0.599881837
## 103 104 105 106 107 108
## -0.532158893 -0.878360151 -1.355029065 -1.813180759 -0.840363591 -1.236286943
## 109 110 111 112 113 114
## -1.260292650 -1.017688967 -0.733136834 3.218780081 0.537073094 2.119773149
## 115 116 117 118 119 120
## 1.265906734 1.329138280 -0.647422117 1.822589419 0.113241617 2.496016154
## 121 122 123 124 125 126
## -0.595751943 2.931588504 1.136759420 2.166321675 0.834670434 2.714627456
## 127 128 129 130 131 132
## 0.948175492 -1.140107373 -0.456240118 3.043044607 1.449029645 1.739142503
## 133 134 135 136 137 138
## -0.595538372 1.485693315 1.599066382 0.919411187 0.822222252 1.109905829
## 139 140 141 142 143 144
## 1.588653339 1.381476015 1.228394001 1.730504144 1.466264352 1.039094181
## 145 146 147 148 149 150
## 0.115602324 -0.597529479 -1.026869141 -0.560440357 -0.637416654 -0.689008549
## 151 152 153 154 155 156
## -0.381958248 -0.355988614 1.829683742 1.675739359 1.665734531 1.592820196
## 157 158 159 160 161 162
## 2.553116895 0.185322642 0.689891703 -0.312840064 -0.590148900 0.515502402
## 163 164 165 166 167 168
## -0.365582021 0.444425835 0.273299828 -0.399987029 0.635964130 0.609824089
## 169 170 171 172 173 174
## -0.609253877 -1.061921255 -0.622091916 -0.602135123 -1.069063118 -0.530211707
## 175 176 177 178 179 180
## 0.486649104 2.696961062 2.276375065 2.767425616 1.295060256 0.954251593
## 181 182 183 184 185 186
## 1.196724048 1.956518579 1.912090011 1.255286332 1.130971418 1.065394456
## 187 188 189 190 191 192
## 1.727824656 1.778100926 1.031369484 1.149997104 0.491384999 1.069531722
## 193 194 195 196 197 198
## 0.444787084 -1.018187850 -0.178106078 -0.726331346 -0.293250606 -1.041588666
## 199 200 201 202 203 204
## -0.457798326 -0.855825310 -0.026271400 0.259325140 -0.338500938 -0.593854473
## 205 206 207 208 209 210
## 0.277584017 -0.637768884 -0.592223728 -0.507831661 -0.184151359 -0.476770541
## 211 212 213 214 215 216
## 1.495071084 1.489762635 1.589931142 1.703909032 2.038482188 1.358891395
## 217 218 219 220 221 222
## 1.092980686 1.366493646 1.066271179 0.981014553 1.302216663 1.159807719
max(LR_OS_ADASYN_Model_Indices)## [1] 3.21878
min(LR_OS_ADASYN_Model_Indices)## [1] -4.305663
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_OS_ADASYN_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_OS_ADASYN)
LR_OS_ADASYN_Model_Predictions$LR_OS_ADASYN_Prob <- LR_OS_ADASYN_Model_Probabilities
LR_OS_ADASYN_Model_Predictions$LR_OS_ADASYN_LP <- LR_OS_ADASYN_Model_Indices
LR_OS_ADASYN_Model_Predictions$Class <- as.factor(LR_OS_ADASYN_Model_Predictions$Class)
LR_OS_ADASYN_Model_Predictions$Label <- rep("LR_OS_ADASYN",nrow(LR_OS_ADASYN_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_OS_ADASYN_Model_Predictions %>%
ggplot(aes(x = LR_OS_ADASYN_LP ,
y = LR_OS_ADASYN_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_ADASYN)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")Oversampling is only performed within minority class instances in the danger category which covers those with more than half of the nearest neighbors from the majority class. | | Logistic Regression models the relationship between the probability of an event (among two outcome levels) by having the log-odds of the event be a linear combination of a set of predictors weighted by their respective parameter estimates. The parameters are estimated via maximum likelihood estimation by testing different values through multiple iterations to optimize for the best fit of log odds. All of these iterations produce the log likelihood function, and logistic regression seeks to maximize this function to find the best parameter estimates. Given the optimal parameters, the conditional probabilities for each observation can be calculated, logged, and summed together to yield a predicted probability. | | [A] The class ratio of the original data was noted at 80:20. | [A.1] Majority Class = Class=M with 111 instances | [A.2] Minority Class = Class=R with 25 instances | | [B] The class ratio of the oversampled data was noted at 50:50 with majority of the added instances being unique values for the minority class. | [B.1] Majority Class = Class=M with 111 instances | [B.2] Minority Class = Class=R with 111 instances | | [C] The logistic regression model from the stats package was implemented. The Class response was regressed against the V1 and V11 predictors. | | [D] The logistic curve formulated by plotting the predicted probabilities against the classification index using the logit values showed a sufficiently balanced logistic profile for the predicted points from both the majority and minority classes. Minimal overlap between both classes is observed driving better differentiation although a minimal skew is still present due to a longer tail for the predicted points belonging to the majority class. |
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Borderline Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing OS_BSMOTE
# Visualizing the oversampled data using OS_BSMOTE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_bsmote(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Borderline Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")OS_BSMOTE <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_bsmote(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_OS_BSMOTE <- OS_BSMOTE %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_OS_BSMOTE <- as.data.frame(PMA_PreModelling_Train_LR_OS_BSMOTE))## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.408807916 -1.63079048 M
## 11 0.738227771 -1.15770286 M
## 12 1.169078303 -0.79246144 M
## 13 0.118911997 -1.30982705 M
## 14 -0.174409184 -1.31877657 M
## 15 1.012209983 -0.76338471 M
## 16 0.813818599 0.26605270 M
## 17 0.207991857 -0.39104999 M
## 18 -0.957451233 0.02573746 M
## 19 0.695599819 -0.11827986 M
## 20 -1.204708453 -0.18639848 M
## 21 -0.068999246 0.13149291 M
## 22 0.527094931 -0.39650480 M
## 23 0.104263091 0.33286517 M
## 24 0.465616266 -0.56497212 M
## 25 -0.507041623 -0.24127887 M
## 26 0.043933590 0.30731955 M
## 27 0.147716923 -0.20173692 M
## 28 0.612463710 0.84401925 M
## 29 -0.068999246 0.63930609 M
## 30 1.395977603 1.55085071 M
## 31 -0.180527407 1.02812486 M
## 32 0.565359423 0.74012531 M
## 33 2.230423635 1.21329169 M
## 34 0.782380194 0.93715691 M
## 35 2.005116912 0.95072168 M
## 36 1.784287809 0.74337479 M
## 37 1.523834205 1.62207084 M
## 38 1.928176420 1.77732114 M
## 39 -1.204708453 0.65264083 M
## 40 1.934080462 1.47897609 M
## 41 0.744203377 2.70773324 M
## 42 1.424344244 -0.63319889 M
## 43 -0.491314245 -0.58236264 M
## 44 0.689392711 -0.83030322 M
## 45 1.381555338 -0.09258957 M
## 46 1.002499594 0.07158504 M
## 47 0.977896818 0.15071601 M
## 48 0.278484686 -2.26945660 M
## 49 1.406688795 0.06372536 M
## 50 1.852613244 0.04399440 M
## 51 1.281675939 -0.27257795 M
## 52 1.390588252 0.33359231 M
## 53 -0.192852168 -0.57752146 M
## 54 -0.180527407 -0.62433567 M
## 55 -0.052243905 -0.17198030 M
## 56 -0.779287302 -0.12994977 M
## 57 -0.041194136 -1.17696945 M
## 58 -0.924156756 -1.24933498 M
## 59 -0.168320027 -1.80173334 M
## 60 -2.093160440 0.59563328 M
## 61 -0.230553865 -0.22058604 M
## 62 -1.038676203 0.48516249 M
## 63 -0.030238811 0.37335493 M
## 64 0.089444831 0.04161877 M
## 65 0.303951059 -0.21715050 M
## 66 -0.132380328 -0.09011428 M
## 67 -1.546463816 0.01457488 M
## 68 -0.499153812 0.87301303 M
## 69 -0.108968439 0.07550792 M
## 70 0.686277848 -1.02337267 M
## 71 -0.721885235 -1.35101476 M
## 72 -3.557061230 -0.28132844 M
## 73 -0.789109306 -0.36302243 M
## 74 -0.750264961 -0.03937819 M
## 75 -0.379134945 2.23365699 M
## 76 -0.371994173 0.60103638 M
## 77 0.401039618 0.45156422 M
## 78 -0.295964244 1.50754826 M
## 79 0.256869245 1.19827720 M
## 80 1.243886483 1.44694263 M
## 81 -0.230553865 0.36255507 M
## 82 -0.256318859 -0.63813480 M
## 83 0.632133128 0.46700244 M
## 84 0.324793230 1.73570326 M
## 85 0.723165726 0.69367752 M
## 86 -1.191144673 1.19711994 M
## 87 -1.177722925 1.07730973 M
## 88 0.142960831 2.42255907 M
## 89 0.443194144 1.74741590 M
## 90 -0.180527407 0.75375436 M
## 91 0.544629613 0.36615870 M
## 92 -1.274767709 0.41409622 M
## 93 -0.539087424 -0.07530230 M
## 94 -0.555415865 0.01776805 M
## 95 0.345301912 -0.27257795 M
## 96 -1.868589923 0.02175517 M
## 97 -0.217863016 -0.21200423 M
## 98 0.625610136 0.17668535 M
## 99 -0.799007156 -0.01833539 M
## 100 -2.013850697 0.05347991 M
## 101 0.537644499 -0.19150300 M
## 102 -0.013979414 0.15531332 M
## 103 -0.935159101 0.53154288 M
## 104 -0.779287302 0.73036007 M
## 105 0.424204681 0.54321732 M
## 106 0.157176488 1.02691760 M
## 107 -0.323092564 0.48794823 M
## 108 0.377448180 0.47190071 M
## 109 0.992715078 0.20396774 M
## 110 0.295518362 0.33867807 M
## 111 0.099342681 0.20698569 M
## 112 -2.902302684 -1.49282840 R
## 113 -0.588710342 -0.46833442 R
## 114 -0.597170869 -1.70542147 R
## 115 -0.289270938 -1.17938825 R
## 116 0.544629613 -1.61762409 R
## 117 0.064360561 0.15607894 R
## 118 -0.168320027 -1.67226688 R
## 119 -0.316256467 -0.26298086 R
## 120 -1.638716948 -1.51501038 R
## 121 0.274190661 0.01776805 R
## 122 -0.860055136 -2.21946189 R
## 123 -0.789109306 -0.84516218 R
## 124 -1.427539411 -1.35491277 R
## 125 0.779483548 -1.33935885 R
## 126 -0.400799099 -2.26338258 R
## 127 -0.302692686 -0.92399268 R
## 128 0.827857790 0.18656255 R
## 129 -0.829167731 0.42261407 R
## 130 -2.434915803 -1.57286451 R
## 131 -1.138280881 -0.92728240 R
## 132 -0.860055136 -1.28443852 R
## 133 -1.125398710 0.66990478 R
## 134 -0.186674974 -1.39954451 R
## 135 -0.008605128 -1.57143555 R
## 136 -1.348878827 -0.41384379 R
## 137 -2.110893443 -0.94312757 R
## 138 -2.045433423 -1.50787056 R
## 139 -2.727906546 -1.49588989 R
## 140 -1.331613148 -0.98926479 R
## 141 -2.884342616 -1.48707040 R
## 142 -1.692543814 -0.65254827 R
## 143 -2.543559671 -1.55426015 R
## 144 -0.200710576 -1.41000300 R
## 145 -0.579994800 -1.75422471 R
## 146 -0.726952717 -1.49758877 R
## 147 -0.427587806 -2.18726646 R
## 148 -0.234409462 -1.43511339 R
## 149 -0.638609937 -1.78645087 R
## 150 -0.424385927 -2.19636412 R
## 151 -0.539891272 -1.86817291 R
## 152 -0.209046173 -1.77581970 R
## 153 -0.198503193 -1.74901247 R
## 154 -0.085576490 -1.62002917 R
## 155 -0.158312844 -1.66594914 R
## 156 -0.255554034 -1.31678566 R
## 157 -0.101553943 -1.63011607 R
## 158 -0.175951690 -1.55887373 R
## 159 -1.536305519 -1.43737038 R
## 160 -1.482954163 -1.39692372 R
## 161 -1.569819776 -1.57734131 R
## 162 -2.179855466 -1.55433108 R
## 163 -1.554224066 -1.59145067 R
## 164 -1.806165345 -1.52717767 R
## 165 -1.620621314 -1.53138141 R
## 166 -0.960871406 -2.12825390 R
## 167 -0.769103007 -2.22816004 R
## 168 -0.703740137 -2.09580937 R
## 169 -1.116503328 -1.82876902 R
## 170 -0.405250123 -1.85968973 R
## 171 -0.708435397 -2.09952354 R
## 172 -0.659278473 -1.82686585 R
## 173 -0.626623939 -2.03480683 R
## 174 -0.336673771 -1.14769148 R
## 175 -0.711430840 -0.69909654 R
## 176 -0.754841345 -0.85071577 R
## 177 -0.842929035 -0.85781982 R
## 178 -0.495358659 -0.89276851 R
## 179 -0.831157170 -1.10551056 R
## 180 -0.652182669 -0.58768701 R
## 181 -1.157096603 -1.32132725 R
## 182 -1.122671198 -1.31705206 R
## 183 -1.243061435 -1.08218655 R
## 184 -0.939653544 -1.29432362 R
## 185 -1.367768189 -1.34748995 R
## 186 -1.542847586 -1.44233004 R
## 187 -1.000539548 -1.01397735 R
## 188 -0.773256346 -2.22776284 R
## 189 -0.226926303 -1.82128283 R
## 190 -0.350132468 -2.05897887 R
## 191 -0.691041322 -2.23562543 R
## 192 -0.250215562 -1.88049954 R
## 193 -0.065672120 -1.67211873 R
## 194 -0.090517759 -1.71595384 R
## 195 -1.736178623 -1.42168866 R
## 196 -2.664395207 -1.53356807 R
## 197 -1.663602473 -1.51681863 R
## 198 -2.158723504 -1.55279557 R
## 199 -2.306693552 -1.43602548 R
## 200 -1.551549377 -0.63013419 R
## 201 -2.133114929 -1.25078205 R
## 202 -1.307953653 -1.17812109 R
## 203 -0.959054286 -1.15735412 R
## 204 -0.801121364 -0.84798724 R
## 205 -1.361056552 -1.25662668 R
## 206 -0.979418863 -0.88992030 R
## 207 -1.031861506 -0.83841126 R
## 208 -1.114796484 -0.95742913 R
## 209 -0.732133486 -1.26089511 R
## 210 -0.999679111 -1.30177802 R
## 211 -0.858361528 -1.27395218 R
## 212 -1.042843755 -1.04979426 R
## 213 -0.827023587 -1.07991659 R
## 214 -0.811668799 -0.98484412 R
## 215 -1.087905903 -0.99194835 R
## 216 -0.185382331 -1.41875093 R
## 217 -0.417948424 -1.57187563 R
## 218 -0.079660886 -1.50284534 R
## 219 -0.022012673 -1.55849323 R
## 220 -0.284049367 -1.19059300 R
## 221 -0.174278835 -1.58372942 R
## 222 -0.277331844 -1.02794573 R
PMA_PreModelling_Train_LR_OS_BSMOTE$Label <- rep("LR_OS_BSMOTE",nrow(PMA_PreModelling_Train_LR_OS_BSMOTE))
##################################
# Verifying the class distribution
# for the oversampled data using OS_BSMOTE
##################################
table(PMA_PreModelling_Train_LR_OS_BSMOTE$Class) ##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_OS_BSMOTE_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_OS_BSMOTE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_BSMOTE_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_BSMOTE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.51118 -0.31186 0.04469 0.46486 2.82180
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3748 0.4136 -5.742 9.36e-09 ***
## V1 -1.3384 0.3235 -4.138 3.50e-05 ***
## V11 -2.5707 0.3536 -7.271 3.58e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 126.59 on 219 degrees of freedom
## AIC: 132.59
##
## Number of Fisher Scoring iterations: 6
LR_OS_BSMOTE_Model_Coef <- (as.data.frame(LR_OS_BSMOTE_Model$coefficients))
LR_OS_BSMOTE_Model_Coef$Coef <- rownames(LR_OS_BSMOTE_Model_Coef)
LR_OS_BSMOTE_Model_Coef$Model <- rep("LR_OS_BSMOTE",nrow(LR_OS_BSMOTE_Model_Coef))
colnames(LR_OS_BSMOTE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_BSMOTE_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -2.374787 (Intercept) LR_OS_BSMOTE
## V1 -1.338444 V1 LR_OS_BSMOTE
## V11 -2.570724 V11 LR_OS_BSMOTE
##################################
# Computing the model predictions
##################################
(LR_OS_BSMOTE_Model_Probabilities <- predict(LR_OS_BSMOTE_Model,
type = c("response")))## 1 2 3 4 5 6
## 6.006859e-01 2.079892e-03 1.628557e-01 1.505806e-04 4.074472e-04 4.789811e-03
## 7 8 9 10 11 12
## 6.307140e-02 4.806695e-03 2.283076e-01 7.808042e-01 4.045050e-01 1.298446e-01
## 13 14 15 16 17 18
## 6.970438e-01 7.771026e-01 1.459006e-01 1.555055e-02 1.613942e-01 2.387661e-01
## 19 20 21 22 23 24
## 4.734715e-02 4.296759e-01 6.783271e-02 1.129486e-01 3.324495e-02 1.757225e-01
## 25 26 27 28 29 30
## 2.542867e-01 3.828633e-02 1.136601e-01 4.659037e-03 1.934256e-02 2.664560e-04
## 31 32 33 34 35 36
## 8.357633e-03 6.469618e-03 2.077176e-04 2.926166e-03 5.513619e-04 1.261781e-03
## 37 38 39 40 41 42
## 1.869932e-04 7.303152e-05 8.016464e-02 1.560020e-04 3.258266e-05 6.577898e-02
## 43 44 45 46 47 48
## 4.451968e-01 2.381189e-01 1.823696e-02 1.982859e-02 1.677255e-02 9.563376e-01
## 49 50 51 52 53 54
## 1.187490e-02 6.912386e-03 3.262472e-02 6.098557e-03 3.470533e-01 3.709446e-01
## 55 56 57 58 59 60
## 1.343831e-01 2.693959e-01 6.695195e-01 8.883345e-01 9.228887e-01 2.489130e-01
## 61 62 63 64 65 66
## 1.825541e-01 9.693127e-02 3.577391e-02 6.904229e-02 9.767097e-02 1.228251e-01
## 67 68 69 70 71 72
## 4.152266e-01 1.887292e-02 8.143187e-02 3.401731e-01 8.874048e-01 9.572771e-01
## 73 74 75 76 77 78
## 4.048304e-01 2.193652e-01 4.954973e-04 3.161516e-02 1.675101e-02 2.859779e-03
## 79 80 81 82 83 84
## 3.021328e-03 4.265516e-04 4.750576e-02 4.034164e-01 1.187473e-02 6.945722e-04
## 85 86 87 88 89 90
## 5.905442e-03 2.067431e-02 2.743980e-02 1.516378e-04 5.752660e-04 1.677652e-02
## 91 92 93 94 95 96
## 1.720777e-02 1.501903e-01 1.885182e-01 1.574837e-01 1.056248e-01 5.175650e-01
## 97 98 99 100 101 102
## 1.767992e-01 2.493234e-02 2.212795e-01 5.456617e-01 6.900384e-02 5.978552e-02
## 103 104 105 106 107 108
## 7.659193e-02 3.881689e-02 1.288117e-02 5.351133e-03 3.928910e-02 1.641340e-02
## 109 110 111 112 113 114
## 1.437443e-02 2.555662e-02 4.565748e-02 9.952622e-01 4.054342e-01 9.431421e-01
## 115 116 117 118 119 120
## 7.396726e-01 7.416806e-01 5.405582e-02 8.956161e-01 2.183267e-01 9.761822e-01
## 121 122 123 124 125 126
## 5.800597e-02 9.888154e-01 7.014203e-01 9.534252e-01 5.062597e-01 9.816606e-01
## 127 128 129 130 131 132
## 6.000517e-01 1.866196e-02 8.695320e-02 9.928083e-01 8.223755e-01 8.887747e-01
## 133 134 135 136 137 138
## 6.974294e-02 8.134993e-01 8.424349e-01 6.211635e-01 9.465990e-01 9.857863e-01
## 139 140 141 142 143 144
## 9.940706e-01 8.755153e-01 9.950755e-01 8.275139e-01 9.934726e-01 8.203295e-01
## 145 146 147 148 149 150
## 9.483940e-01 9.204214e-01 9.785506e-01 8.359281e-01 9.557410e-01 9.789479e-01
## 151 152 153 154 155 156
## 9.589239e-01 9.220227e-01 9.158404e-01 8.703888e-01 8.928129e-01 7.945136e-01
## 157 158 159 160 161 162
## 8.756336e-01 8.662439e-01 9.669637e-01 9.608783e-01 9.777136e-01 9.894245e-01
## 163 164 165 166 167 168
## 9.780466e-01 9.814510e-01 9.765941e-01 9.876594e-01 9.876613e-01 9.811992e-01
## 169 170 171 172 173 174
## 9.785606e-01 9.501862e-01 9.814891e-01 9.609841e-01 9.757512e-01 7.361844e-01
## 175 176 177 178 179 180
## 5.925718e-01 6.947628e-01 7.228503e-01 6.418228e-01 8.291525e-01 5.022260e-01
## 181 182 183 184 185 186
## 9.289512e-01 9.250913e-01 8.880508e-01 9.011649e-01 9.488224e-01 9.676439e-01
## 187 188 189 190 191 192
## 8.279311e-01 9.877165e-01 9.315634e-01 9.672930e-01 9.865775e-01 9.423702e-01
## 193 194 195 196 197 198
## 8.820166e-01 8.963778e-01 9.735002e-01 9.941401e-01 9.770490e-01 9.890819e-01
## 199 200 201 202 203 204
## 9.879220e-01 7.894770e-01 9.757711e-01 9.171668e-01 8.680804e-01 7.062853e-01
## 205 206 207 208 209 210
## 9.356769e-01 7.727403e-01 7.616280e-01 8.290050e-01 8.637211e-01 9.096805e-01
## 211 212 213 214 215 216
## 8.858527e-01 8.480944e-01 8.188099e-01 7.761464e-01 8.363531e-01 8.206201e-01
## 217 218 219 220 221 222
## 9.025137e-01 8.313514e-01 8.403899e-01 7.438513e-01 8.732280e-01 6.545208e-01
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_OS_BSMOTE_Model_Indices <- predict(LR_OS_BSMOTE_Model,
type = c("link")))## 1 2 3 4 5
## 0.408324003 -6.173357428 -1.637131633 -8.800861401 -7.805191778
## 6 7 8 9 10
## -5.336462967 -2.698339590 -5.332927345 -1.217892324 1.270358909
## 11 12 13 14 15
## -0.386729015 -1.902333371 0.833259849 1.248860586 -1.767122159
## 16 17 18 19 20
## -4.147986246 -1.647891134 -1.159456434 -3.001743931 -0.283173443
## 21 22 23 24 25
## -2.620467808 -2.060969930 -3.370042340 -1.545601145 -1.075879089
## 26 27 28 29 30
## -3.223623920 -2.053888243 -5.364276686 -3.925915527 -8.230035045
## 31 32 33 34 35
## -4.776187215 -5.034147554 -8.479123183 -5.831131646 -7.502567613
## 36 37 38 39 40
## -6.673968501 -8.584251291 -9.524546462 -2.440112242 -8.765485802
## 41 42 43 44 45
## -10.331697835 -2.653412674 -0.220096954 -1.163020255 -3.985899663
## 46 47 48 49 50
## -3.900602432 -4.071096916 3.086623816 -4.421382008 -4.967504071
## 51 52 53 54 55
## -3.389516181 -5.093585877 -0.632017084 -0.528166665 -1.862747933
## 56 57 58 59 60
## -0.997689924 0.706012696 2.073840593 2.482259349 -1.104418259
## 61 62 63 64 65
## -1.499138052 -2.231796468 -3.294107073 -2.601494709 -2.223374824
## 66 67 68 69 70
## -1.965944774 -0.342400130 -3.950973850 -2.423049316 -0.662522844
## 71 72 73 74 75
## 2.064502155 3.109357894 -0.385378145 -1.269369274 -7.609452965
## 76 77 78 79 80
## -3.421992894 -4.072403649 -5.854146889 -5.799032924 -7.759350558
## 81 82 83 84 85
## -2.998233112 -0.391250280 -4.421396791 -7.271519668 -5.125957993
## 86 87 88 89 90
## -3.857972335 -3.567937550 -8.793864147 -7.460102633 -4.070856278
## 91 92 93 94 95
## -4.045036769 -1.733109397 -1.459667592 -1.677071125 -2.136231934
## 96 97 98 99 100
## 0.070289110 -1.538185522 -3.666340925 -1.258225807 0.183156955
## 101 102 103 104 105
## -2.602093063 -2.755344499 -2.489579517 -3.209309392 -4.339023651
## 106 107 108 109 110
## -5.225081444 -3.196726510 -4.093107340 -4.227825845 -3.640970185
## 111 112 113 114 115
## -3.039855195 5.347432700 -0.382872854 2.808660917 1.044267677
## 116 117 118 119 120
## 1.054722007 -2.862166349 2.149436749 -1.275444526 3.713217647
## 121 122 123 124 125
## -2.787453045 4.481973033 0.854070247 3.019001507 0.025039910
## 126 127 128 129 130
## 3.980192518 0.405680323 -3.962429636 -2.351417088 4.927612286
## 131 132 133 134 135
## 1.532525319 2.078285669 -2.590644755 1.472909778 1.676457742
## 136 137 138 139 140
## 0.494489753 2.875046379 4.239230380 5.121883467 1.950629387
## 141 142 143 144 145
## 5.308591914 1.568109532 5.025199351 1.518581542 2.911131389
## 146 147 148 149 150
## 2.448086088 3.820374146 1.628237508 3.072429056 3.839476193
## 151 152 153 154 155
## 3.150384469 2.470152182 2.387126982 1.904400400 2.119801522
## 156 157 158 159 160
## 1.352350344 1.951715959 1.868148771 3.376554627 3.201169629
## 161 162 163 164 165
## 3.781238284 4.538583963 3.796635579 3.968616703 3.731083062
## 166 167 168 169 170
## 4.382439385 4.382599258 3.954877615 3.820850921 2.948366914
## 171 172 173 174 175
## 3.970710070 3.203988514 3.694841183 1.026230052 0.374607475
## 176 177 178 179 180
## 0.822481260 0.958644246 0.583284207 1.579632898 0.008903908
## 181 182 183 184 185
## 2.570689819 2.513622999 2.070984108 2.210235552 2.919919051
## 186 187 188 189 190
## 3.398060745 1.571035089 4.387137173 2.610956908 3.386912469
## 191 192 193 194 195
## 4.297309508 2.794358127 2.011667419 2.157609899 3.603760225
## 196 197 198 199 200
## 5.133737324 3.751174055 4.506352641 4.404218553 1.321775934
## 201 202 203 204 205
## 3.695683464 2.404459967 1.884091524 0.877410183 2.677351471
## 206 207 208 209 210
## 1.223849729 1.161625750 1.578591698 1.846546094 2.309739633
## 211 212 213 214 215
## 2.049061392 1.719732291 1.508305278 1.243348656 1.631339559
## 216 217 218 219 220
## 1.520554047 2.225472173 1.595235372 1.661131855 1.066083204
## 221 222
## 1.929806883 0.638970888
max(LR_OS_BSMOTE_Model_Indices)## [1] 5.347433
min(LR_OS_BSMOTE_Model_Indices)## [1] -10.3317
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_OS_BSMOTE_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_OS_BSMOTE)
LR_OS_BSMOTE_Model_Predictions$LR_OS_BSMOTE_Prob <- LR_OS_BSMOTE_Model_Probabilities
LR_OS_BSMOTE_Model_Predictions$LR_OS_BSMOTE_LP <- LR_OS_BSMOTE_Model_Indices
LR_OS_BSMOTE_Model_Predictions$Class <- as.factor(LR_OS_BSMOTE_Model_Predictions$Class)
LR_OS_BSMOTE_Model_Predictions$Label <- rep("LR_OS_BSMOTE",nrow(LR_OS_BSMOTE_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_OS_BSMOTE_Model_Predictions %>%
ggplot(aes(x = LR_OS_BSMOTE_LP ,
y = LR_OS_BSMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_BSMOTE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing OS_SMOTE
# Visualizing the oversampled data using OS_SMOTE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_smote(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")OS_SMOTE <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_smote(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_OS_SMOTE <- OS_SMOTE %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_OS_SMOTE <- as.data.frame(PMA_PreModelling_Train_LR_OS_SMOTE))## V1 V11 Class
## 1 0.914240973 -1.558615871 M
## 2 2.175018299 0.345206089 M
## 3 -0.230553865 -0.166907171 M
## 4 1.231717734 1.858421213 M
## 5 0.424204681 1.891540835 M
## 6 1.143139503 0.556904232 M
## 7 -0.507041623 0.389850825 M
## 8 0.312328639 0.988089406 M
## 9 -0.935159101 0.036862390 M
## 10 0.408807916 -1.630790485 M
## 11 0.738227771 -1.157702863 M
## 12 1.169078303 -0.792461442 M
## 13 0.118911997 -1.309827049 M
## 14 -0.174409184 -1.318776567 M
## 15 1.012209983 -0.763384712 M
## 16 0.813818599 0.266052702 M
## 17 0.207991857 -0.391049992 M
## 18 -0.957451233 0.025737464 M
## 19 0.695599819 -0.118279862 M
## 20 -1.204708453 -0.186398477 M
## 21 -0.068999246 0.131492907 M
## 22 0.527094931 -0.396504803 M
## 23 0.104263091 0.332865171 M
## 24 0.465616266 -0.564972117 M
## 25 -0.507041623 -0.241278874 M
## 26 0.043933590 0.307319548 M
## 27 0.147716923 -0.201736917 M
## 28 0.612463710 0.844019251 M
## 29 -0.068999246 0.639306087 M
## 30 1.395977603 1.550850706 M
## 31 -0.180527407 1.028124858 M
## 32 0.565359423 0.740125313 M
## 33 2.230423635 1.213291690 M
## 34 0.782380194 0.937156911 M
## 35 2.005116912 0.950721679 M
## 36 1.784287809 0.743374785 M
## 37 1.523834205 1.622070841 M
## 38 1.928176420 1.777321144 M
## 39 -1.204708453 0.652640830 M
## 40 1.934080462 1.478976095 M
## 41 0.744203377 2.707733238 M
## 42 1.424344244 -0.633198890 M
## 43 -0.491314245 -0.582362640 M
## 44 0.689392711 -0.830303223 M
## 45 1.381555338 -0.092589575 M
## 46 1.002499594 0.071585037 M
## 47 0.977896818 0.150716010 M
## 48 0.278484686 -2.269456602 M
## 49 1.406688795 0.063725362 M
## 50 1.852613244 0.043994401 M
## 51 1.281675939 -0.272577947 M
## 52 1.390588252 0.333592312 M
## 53 -0.192852168 -0.577521460 M
## 54 -0.180527407 -0.624335667 M
## 55 -0.052243905 -0.171980296 M
## 56 -0.779287302 -0.129949769 M
## 57 -0.041194136 -1.176969454 M
## 58 -0.924156756 -1.249334984 M
## 59 -0.168320027 -1.801733345 M
## 60 -2.093160440 0.595633277 M
## 61 -0.230553865 -0.220586042 M
## 62 -1.038676203 0.485162490 M
## 63 -0.030238811 0.373354928 M
## 64 0.089444831 0.041618772 M
## 65 0.303951059 -0.217150498 M
## 66 -0.132380328 -0.090114281 M
## 67 -1.546463816 0.014574884 M
## 68 -0.499153812 0.873013033 M
## 69 -0.108968439 0.075507923 M
## 70 0.686277848 -1.023372673 M
## 71 -0.721885235 -1.351014760 M
## 72 -3.557061230 -0.281328435 M
## 73 -0.789109306 -0.363022432 M
## 74 -0.750264961 -0.039378188 M
## 75 -0.379134945 2.233656987 M
## 76 -0.371994173 0.601036378 M
## 77 0.401039618 0.451564218 M
## 78 -0.295964244 1.507548259 M
## 79 0.256869245 1.198277196 M
## 80 1.243886483 1.446942626 M
## 81 -0.230553865 0.362555066 M
## 82 -0.256318859 -0.638134800 M
## 83 0.632133128 0.467002438 M
## 84 0.324793230 1.735703263 M
## 85 0.723165726 0.693677522 M
## 86 -1.191144673 1.197119945 M
## 87 -1.177722925 1.077309728 M
## 88 0.142960831 2.422559073 M
## 89 0.443194144 1.747415902 M
## 90 -0.180527407 0.753754356 M
## 91 0.544629613 0.366158697 M
## 92 -1.274767709 0.414096217 M
## 93 -0.539087424 -0.075302304 M
## 94 -0.555415865 0.017768054 M
## 95 0.345301912 -0.272577947 M
## 96 -1.868589923 0.021755168 M
## 97 -0.217863016 -0.212004228 M
## 98 0.625610136 0.176685353 M
## 99 -0.799007156 -0.018335390 M
## 100 -2.013850697 0.053479912 M
## 101 0.537644499 -0.191503002 M
## 102 -0.013979414 0.155313323 M
## 103 -0.935159101 0.531542880 M
## 104 -0.779287302 0.730360068 M
## 105 0.424204681 0.543217319 M
## 106 0.157176488 1.026917595 M
## 107 -0.323092564 0.487948233 M
## 108 0.377448180 0.471900708 M
## 109 0.992715078 0.203967739 M
## 110 0.295518362 0.338678072 M
## 111 0.099342681 0.206985690 M
## 112 -2.902302684 -1.492828399 R
## 113 -0.588710342 -0.468334419 R
## 114 -0.597170869 -1.705421467 R
## 115 -0.289270938 -1.179388252 R
## 116 0.544629613 -1.617624095 R
## 117 0.064360561 0.156078935 R
## 118 -0.168320027 -1.672266879 R
## 119 -0.316256467 -0.262980859 R
## 120 -1.638716948 -1.515010380 R
## 121 0.274190661 0.017768054 R
## 122 -0.860055136 -2.219461886 R
## 123 -0.789109306 -0.845162176 R
## 124 -1.427539411 -1.354912772 R
## 125 0.779483548 -1.339358851 R
## 126 -0.400799099 -2.263382579 R
## 127 -0.302692686 -0.923992684 R
## 128 0.827857790 0.186562547 R
## 129 -0.829167731 0.422614069 R
## 130 -2.434915803 -1.572864509 R
## 131 -1.138280881 -0.927282397 R
## 132 -0.860055136 -1.284438518 R
## 133 -1.125398710 0.669904780 R
## 134 -0.186674974 -1.399544510 R
## 135 -0.008605128 -1.571435552 R
## 136 -1.348878827 -0.413843793 R
## 137 -1.599032158 -1.370950282 R
## 138 -2.657432813 -1.322745508 R
## 139 -1.790490183 -1.512346033 R
## 140 -0.810443562 -0.452440072 R
## 141 -0.538605201 -0.548157539 R
## 142 -0.660062049 -0.527920440 R
## 143 -0.982531013 -0.440104448 R
## 144 -0.584866894 -1.740381378 R
## 145 -0.455540713 -2.107842443 R
## 146 -0.515081007 -1.938667573 R
## 147 -0.418862256 -1.691636374 R
## 148 -0.239516596 -1.286153950 R
## 149 -0.200058557 -1.542931405 R
## 150 -0.280193092 -1.198868010 R
## 151 0.168622255 -1.586231937 R
## 152 0.570113701 -1.587429430 R
## 153 0.594021690 -1.559102196 R
## 154 0.043595498 0.162273052 R
## 155 0.729004935 0.182615719 R
## 156 -0.112595852 0.208864171 R
## 157 -0.265229694 -1.679758994 R
## 158 -0.273378159 -1.939394197 R
## 159 -0.321171380 -2.060916205 R
## 160 -0.133312506 -1.650165905 R
## 161 0.134694842 -0.048560153 R
## 162 -0.475824201 -0.459442307 R
## 163 0.218194825 -0.008857141 R
## 164 -1.947338832 -1.537435745 R
## 165 -1.603461320 -1.473605052 R
## 166 -1.761439442 -1.523927754 R
## 167 -0.074193522 -0.550968323 R
## 168 0.602335677 0.117808437 R
## 169 -0.315403865 -0.314371272 R
## 170 -0.641190054 -2.240392925 R
## 171 -0.860055136 -1.717141529 R
## 172 -0.418893061 -1.870481935 R
## 173 -0.634044436 -1.777523552 R
## 174 -0.796581793 -0.891429685 R
## 175 -0.827847489 -1.085017958 R
## 176 -0.655076888 -0.593129261 R
## 177 -0.846646467 -1.201415865 R
## 178 -1.003113777 -1.302204563 R
## 179 -1.477508813 -1.392795503 R
## 180 -1.024062139 -1.032758823 R
## 181 -1.564325317 -1.458612705 R
## 182 -0.046092087 -1.390787072 R
## 183 -0.066053607 -1.212799303 R
## 184 0.616583983 -1.532369401 R
## 185 -0.286655632 -1.973154324 R
## 186 -0.020863313 -1.593062643 R
## 187 -0.264751485 -2.023354050 R
## 188 -0.360121149 -0.914685611 R
## 189 -0.291775634 -1.131727665 R
## 190 -0.270452304 -1.056144682 R
## 191 -0.234639216 -1.202941057 R
## 192 -0.084662664 -0.235306944 R
## 193 -0.539978141 -0.445804920 R
## 194 -0.263127896 -0.317813628 R
## 195 -0.719767227 0.017260698 R
## 196 -0.729847094 0.289854799 R
## 197 -0.854171908 0.443487311 R
## 198 -2.209147026 -1.524018122 R
## 199 -2.549911790 -1.553172404 R
## 200 -2.408990100 -1.545196549 R
## 201 -0.955492262 -1.161926657 R
## 202 -0.882407262 -0.713601583 R
## 203 -1.281912481 -0.577108003 R
## 204 -0.852329589 -0.860030693 R
## 205 -0.879649125 -1.259285873 R
## 206 -0.820084244 -1.036950160 R
## 207 -0.817419038 -1.020447965 R
## 208 -1.332052173 -0.332244311 R
## 209 -0.357437358 -0.215502108 R
## 210 -0.321886669 0.322888990 R
## 211 -1.300027287 -0.176942239 R
## 212 -0.158828325 -1.426424914 R
## 213 -0.255897661 -1.251002529 R
## 214 -0.177300888 -1.538827013 R
## 215 -0.255677280 -1.116706900 R
## 216 -0.162403256 -1.606447414 R
## 217 -0.161098689 -1.424233330 R
## 218 -0.099380030 -1.483810427 R
## 219 -1.358596322 -0.530100664 R
## 220 -0.896995634 0.313447296 R
## 221 -1.278607918 -0.418880974 R
## 222 -1.404025395 -1.073598925 R
PMA_PreModelling_Train_LR_OS_SMOTE$Label <- rep("LR_OS_SMOTE",nrow(PMA_PreModelling_Train_LR_OS_SMOTE))
##################################
# Verifying the class distribution
# for the oversampled data using OS_SMOTE
##################################
table(PMA_PreModelling_Train_LR_OS_SMOTE$Class) ##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_OS_SMOTE_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_OS_SMOTE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_SMOTE_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_SMOTE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.37841 -0.64478 0.05535 0.65629 2.12157
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0415 0.2324 -4.481 7.43e-06 ***
## V1 -0.9329 0.2481 -3.760 0.00017 ***
## V11 -1.7443 0.2506 -6.961 3.38e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 189.34 on 219 degrees of freedom
## AIC: 195.34
##
## Number of Fisher Scoring iterations: 5
LR_OS_SMOTE_Model_Coef <- (as.data.frame(LR_OS_SMOTE_Model$coefficients))
LR_OS_SMOTE_Model_Coef$Coef <- rownames(LR_OS_SMOTE_Model_Coef)
LR_OS_SMOTE_Model_Coef$Model <- rep("LR_OS_SMOTE",nrow(LR_OS_SMOTE_Model_Coef))
colnames(LR_OS_SMOTE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_SMOTE_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -1.0415029 (Intercept) LR_OS_SMOTE
## V1 -0.9328753 V1 LR_OS_SMOTE
## V11 -1.7442581 V11 LR_OS_SMOTE
##################################
# Computing the model predictions
##################################
(LR_OS_SMOTE_Model_Probabilities <- predict(LR_OS_SMOTE_Model,
type = c("response")))## 1 2 3 4 5 6
## 0.695138300 0.024779368 0.369282594 0.004354817 0.008692463 0.043969471
## 7 8 9 10 11 12
## 0.222960465 0.044944135 0.441910102 0.805596919 0.571789299 0.320853998
## 13 14 15 16 17 18
## 0.756252682 0.805571240 0.342037247 0.094083981 0.365066375 0.451846444
## 19 20 21 22 23 24
## 0.184808902 0.600480342 0.230322198 0.301195793 0.151952308 0.379797538
## 25 26 27 28 29 30
## 0.463155988 0.165407411 0.304190208 0.043728465 0.109852360 0.006375734
## 31 32 33 34 35 36
## 0.064984856 0.054172728 0.005280164 0.032108175 0.010248414 0.017939314
## 37 38 39 40 41 42
## 0.005004700 0.002624183 0.258068057 0.004383673 0.001564322 0.219981147
## 43 44 45 46 47 48
## 0.606499788 0.441184242 0.102585458 0.108944043 0.098265568 0.934454823
## 49 50 51 52 53 54
## 0.078354180 0.054862637 0.146577492 0.051143629 0.536373281 0.553768583
## 55 56 57 58 59 60
## 0.333413190 0.478049348 0.740748894 0.880774271 0.905356078 0.468097562
## 61 62 63 64 65 66
## 0.391344086 0.285203257 0.159156007 0.231913539 0.279632581 0.318466956
## 67 68 69 70 71 72
## 0.592843463 0.109231538 0.255107619 0.525804921 0.879578471 0.940894174
## 73 74 75 76 77 78
## 0.581234332 0.432191970 0.010111774 0.148952395 0.099457455 0.032453398
## 79 80 81 82 83 84
## 0.033206650 0.008786310 0.188648708 0.577051822 0.079747910 0.012468425
## 85 86 87 88 89 90
## 0.050879338 0.117284577 0.139201194 0.004494323 0.010955560 0.100848196
## 91 92 93 94 95 96
## 0.100810641 0.360177402 0.399570525 0.364853162 0.291485499 0.660093722
## 97 98 99 100 101 102
## 0.384977539 0.126385323 0.434344509 0.677845021 0.229873159 0.214271334
## 103 104 105 106 107 108
## 0.250440555 0.169600465 0.084343448 0.048368075 0.169214359 0.098258011
## 109 110 111 112 113 114
## 0.089207115 0.129213957 0.183140465 0.986209131 0.580441619 0.923457474
## 115 116 117 118 119 120
## 0.783385955 0.781081414 0.202008970 0.884155454 0.428550245 0.958104757
## 121 122 123 124 125 126
## 0.209443900 0.974222449 0.762931417 0.934228287 0.638191570 0.963747328
## 127 128 129 130 131 132
## 0.701102490 0.105344361 0.267931877 0.981538824 0.837234144 0.880924439
## 133 134 135 136 137 138
## 0.238643107 0.828325850 0.846513532 0.718833604 0.944876905 0.976904705
## 139 140 141 142 143 144
## 0.963269404 0.623331302 0.602783546 0.621302933 0.655370721 0.926883871
## 145 146 147 148 149 150
## 0.955215764 0.943775731 0.908868243 0.806171172 0.862522074 0.787683378
## 151 152 153 154 155 156
## 0.827497084 0.767730195 0.754697624 0.203393521 0.115056286 0.214034101
## 157 158 159 160 161 162
## 0.894339710 0.930624937 0.945467349 0.876660196 0.253041716 0.550766391
## 163 164 165 166 167 168
## 0.226251030 0.969433469 0.953676457 0.963024518 0.497185366 0.140771687
## 169 170 171 172 173 174
## 0.450431899 0.969659778 0.940249029 0.931621958 0.934038188 0.778421640
## 175 176 177 178 179 180
## 0.835253446 0.646610366 0.863408362 0.897122923 0.940823077 0.847511773
## 181 182 183 184 185 186
## 0.950828270 0.806493833 0.756862021 0.741953895 0.935087595 0.852792408
## 187 188 189 190 191 192
## 0.939045080 0.708868092 0.769362054 0.741347551 0.781704288 0.365380305
## 193 194 195 196 197 198
## 0.559671314 0.439870312 0.401274633 0.296039750 0.265373614 0.975340798
## 199 200 201 202 203 204
## 0.982814940 0.980179251 0.867214606 0.736213169 0.761511998 0.777944845
## 205 206 207 208 209 210
## 0.878213438 0.822334132 0.817719367 0.685821378 0.417713421 0.213421318
## 211 212 213 214 215 216
## 0.617722703 0.831278762 0.798876785 0.859120974 0.758566641 0.871250176
## 217 218 219 220 221 222
## 0.831039532 0.837447101 0.759607794 0.320505259 0.707218982 0.894816419
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_OS_SMOTE_Model_Indices <- predict(LR_OS_SMOTE_Model,
type = c("link")))## 1 2 3 4 5 6
## 0.82425259 -3.67265237 -0.53529572 -5.43210828 -4.73656844 -3.07929429
## 7 8 9 10 11 12
## -1.24849676 -3.05634956 -0.23341359 1.42164977 0.28915520 -0.74984995
## 13 14 15 16 17 18
## 1.13224345 1.42148582 -0.65422868 -2.26475881 -0.55344128 -0.19321306
## 19 20 21 22 23 24
## -1.48410023 0.40746693 -1.20649280 -0.84161007 -1.71937016 -0.49040766
## 25 26 27 28 29 30
## -0.14764367 -1.61853210 -0.82742315 -3.08504263 -2.09225005 -5.04885991
## 31 32 33 34 35 36
## -2.66640847 -2.85988236 -5.23850398 -3.40600965 -4.57033103 -4.00265851
## 37 38 39 40 41 42
## -5.29236048 -5.94035796 -1.05603418 -5.42547492 -6.45873755 -1.26577624
## 43 44 45 46 47 48
## 0.43262277 -0.23635725 -2.16882171 -2.10157285 -2.21664637 2.65722365
## 49 50 51 52 53 54
## -2.46492169 -2.84649772 -1.76170050 -2.92061950 0.14575059 0.21590919
## 55 56 57 58 59 60
## -0.69278785 -0.08785908 1.04986458 1.99978279 2.25820667 -0.12778335
## 61 62 63 64 65 66
## -0.44166592 -0.91879612 -1.66452124 -1.19753768 -0.94628485 -0.76082602
## 67 68 69 70 71 72
## 0.37573267 -2.09861470 -1.07155426 0.10331148 1.98844445 2.76750117
## 73 74 75 76 77 78
## 0.32784250 -0.27291352 -4.58389159 -1.74284131 -2.20326744 -3.39495845
## 79 80 81 82 83 84
## -3.37123462 -4.72573535 -1.45881452 0.31068241 -2.44577712 -4.37200900
## 85 86 87 88 89 90
## -2.92607903 -2.01839960 -1.82194048 -5.40043585 -4.50289216 -2.18783550
## 91 92 93 94 95 96
## -2.18824973 -0.57459425 -0.40725491 -0.55436124 -0.88818027 0.66371190
## 97 98 99 100 101 102
## -0.46847380 -1.93330405 -0.26414720 0.74388595 -1.20902755 -1.29936840
## 103 104 105 106 107 108
## -1.09626404 -1.58846149 -2.38474422 -2.97933833 -1.59120550 -2.21673166
## 109 110 111 112 113 114
## -2.32335472 -1.90792669 -1.49521373 4.26986170 0.32458654 2.49027827
## 115 116 117 118 119 120
## 1.28550832 1.27197948 -1.37378525 2.03238373 -0.28776857 3.12978483
## 121 122 123 124 125 126
## -1.32828070 3.63213568 1.16881866 2.65353097 0.56752363 3.28031608
## 127 128 129 130 131 132
## 0.85255334 -2.13920427 -1.00514081 3.97345164 1.63779107 2.00121359
## 133 134 135 136 137 138
## -1.16013307 1.57380841 1.70751378 0.93868325 2.84148588 3.74476018
## 139 140 141 142 143 144
## 3.26672303 0.50371215 0.41707681 0.49508219 0.64273177 2.53977930
## 145 146 147 148 149 150
## 3.06008104 2.82054007 2.29989380 1.42532065 1.83639738 1.31101754
## 151 152 153 154 155 156
## 1.56799145 1.19553871 1.12382553 -1.36521818 -2.04010260 -1.30077805
## 157 158 159 160 161 162
## 2.13585655 2.59632886 2.85287973 1.96117628 -1.08245498 0.20376771
## 163 164 165 166 167 168
## -1.22960236 3.45680620 3.02467415 3.25982382 -0.01125866 -1.80889534
## 169 170 171 172 173 174
## -0.19892580 3.46447098 2.75595933 2.61187535 2.65044135 1.25649204
## 175 176 177 178 179 180
## 1.62332695 0.60417267 1.84389204 2.16565804 2.76622345 1.71521733
## 181 182 183 184 185 186
## 2.96201461 1.42738687 1.13555187 1.05614883 2.66760147 1.75667237
## 187 188 189 190 191 192
## 2.73472891 0.88989300 1.20471252 1.05298428 1.27562590 -0.55208717
## 193 194 195 196 197 198
## 0.23982821 -0.24168840 -0.40015694 -0.86622815 -1.01822335 3.67763681
## 199 200 201 202 203 204
## 4.04638045 3.90100613 1.87655223 1.02637839 1.16098692 1.25372983
## 205 206 207 208 209 210
## 1.97561964 1.53224216 1.50097177 0.78065553 -0.33216713 -1.30442453
## 211 212 213 214 215 216
## 0.47989340 1.59471732 1.37928899 1.80800819 1.14483716 1.91205799
## 217 218 219 220 221 222
## 1.59301260 1.63935462 1.15053046 -0.75145081 0.88191540 2.14091136
max(LR_OS_SMOTE_Model_Indices)## [1] 4.269862
min(LR_OS_SMOTE_Model_Indices)## [1] -6.458738
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_OS_SMOTE_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_OS_SMOTE)
LR_OS_SMOTE_Model_Predictions$LR_OS_SMOTE_Prob <- LR_OS_SMOTE_Model_Probabilities
LR_OS_SMOTE_Model_Predictions$LR_OS_SMOTE_LP <- LR_OS_SMOTE_Model_Indices
LR_OS_SMOTE_Model_Predictions$Class <- as.factor(LR_OS_SMOTE_Model_Predictions$Class)
LR_OS_SMOTE_Model_Predictions$Label <- rep("LR_OS_SMOTE",nrow(LR_OS_SMOTE_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_OS_SMOTE_Model_Predictions %>%
ggplot(aes(x = LR_OS_SMOTE_LP ,
y = LR_OS_SMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_SMOTE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Random Oversampling Examples") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Implementing OS_ROSE
# Visualizing the oversampled data using OS_ROSE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_rose(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Random Oversampling Examples") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")OS_ROSE <- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_rose(Class, seed=123456789) %>%
prep()
PMA_PreModelling_Train_LR_OS_ROSE <- OS_ROSE %>%
bake(new_data = NULL)
(PMA_PreModelling_Train_LR_OS_ROSE <- as.data.frame(PMA_PreModelling_Train_LR_OS_ROSE))## V1 V11 Class
## 1 0.453829274 -0.420221366 M
## 2 0.127920224 0.686476420 M
## 3 1.075595629 -0.235554656 M
## 4 2.137901379 -1.178271820 M
## 5 1.092110905 0.456926231 M
## 6 0.857402441 1.398298653 M
## 7 -1.907459325 -0.021176979 M
## 8 0.309616027 -0.332458449 M
## 9 0.693229730 -2.341819879 M
## 10 1.716214396 -0.555582217 M
## 11 -0.770935156 -0.187998880 M
## 12 -1.496837087 1.378598435 M
## 13 -0.106000306 1.534889807 M
## 14 -0.916802330 -1.457363172 M
## 15 0.157063822 0.775286340 M
## 16 -0.064112224 -1.717694803 M
## 17 -0.112461451 -0.079932050 M
## 18 0.125906565 -1.196069844 M
## 19 -0.946156465 0.057997744 M
## 20 -1.748418397 -0.678614715 M
## 21 0.690260404 0.140155995 M
## 22 -0.560768697 0.384716767 M
## 23 0.250861459 -0.498669324 M
## 24 -2.438990535 0.731196382 M
## 25 1.529501791 -1.522171442 M
## 26 2.558447121 1.985131848 M
## 27 -0.387603764 0.364725766 M
## 28 -0.961430471 -0.770858863 M
## 29 0.863038442 2.196168690 M
## 30 1.039157671 0.133492560 M
## 31 -1.697870849 -1.143428379 M
## 32 -0.610125646 -0.127814520 M
## 33 -0.749263527 -0.150861865 M
## 34 -0.279227167 0.078775816 M
## 35 0.194985084 0.683020200 M
## 36 0.210531378 1.454231491 M
## 37 -0.147592914 0.362929787 M
## 38 -0.146821909 -1.873438319 M
## 39 -0.268162038 0.392433523 M
## 40 1.087154437 -1.567477864 M
## 41 -3.258143427 -0.310093040 M
## 42 0.221639048 2.228731592 M
## 43 -1.339735538 1.934676727 M
## 44 1.124251223 2.209600237 M
## 45 -1.920302036 0.658912503 M
## 46 0.320103472 0.002252489 M
## 47 -0.373684831 0.377203482 M
## 48 0.416501911 0.873626212 M
## 49 -0.337904242 0.283054835 M
## 50 -0.488958504 0.548829476 M
## 51 0.837780447 0.174524037 M
## 52 -0.818636727 1.462313516 M
## 53 0.522647205 0.301663855 M
## 54 0.077677433 -0.395192354 M
## 55 0.808685352 0.089522951 M
## 56 0.121157987 -1.487662966 M
## 57 0.930609853 -0.001536725 M
## 58 1.606666273 -0.460572860 M
## 59 1.879030954 1.415353475 M
## 60 -0.259700391 1.886523777 M
## 61 -1.163566178 -0.350578696 M
## 62 -0.473028462 -0.152114025 M
## 63 -0.270091603 0.103561905 M
## 64 -0.381874467 1.008824831 M
## 65 0.108606139 2.879619917 M
## 66 1.200948103 -0.411763531 M
## 67 -0.143328184 -1.493160633 M
## 68 1.381475831 0.613253958 M
## 69 1.396345117 0.826444551 M
## 70 0.096154862 -1.106932315 M
## 71 0.697201801 0.765645056 M
## 72 0.280450179 -0.779498730 M
## 73 -0.819993172 0.582228816 M
## 74 0.113508607 1.089077908 M
## 75 0.324148150 1.032218252 M
## 76 1.331741998 -0.388969403 M
## 77 -0.504012381 -0.624175476 M
## 78 -0.278104210 0.190564663 M
## 79 1.514692613 0.102141162 M
## 80 -1.332434679 0.332392981 M
## 81 0.748018715 2.546676685 M
## 82 -1.401265796 0.653245938 M
## 83 -3.555046462 -0.437014632 M
## 84 -0.797376264 0.590342435 M
## 85 -1.153805197 0.874627800 M
## 86 1.207193590 -0.900335700 M
## 87 -0.892535628 -0.053046654 M
## 88 0.025673058 -0.103210450 M
## 89 0.708863775 0.305835422 M
## 90 -0.479304016 -0.828521934 M
## 91 0.531855505 -1.020076195 M
## 92 0.843587712 -0.127072250 M
## 93 0.321447635 -0.242383581 M
## 94 -0.104688089 -0.955018789 M
## 95 -1.223682265 -0.993723911 M
## 96 -0.060035584 0.193030555 M
## 97 -0.184412189 -0.355696298 M
## 98 0.739698901 -0.008225890 M
## 99 1.751934661 0.060003736 M
## 100 0.012753639 -0.683233510 M
## 101 0.704272106 1.556907557 M
## 102 0.349253294 -1.364315460 M
## 103 -0.054249920 -0.279637524 M
## 104 0.422806210 1.856196052 M
## 105 -0.378916586 -1.774888727 M
## 106 0.896085827 -0.207757879 M
## 107 0.724946782 -1.974278177 M
## 108 1.177536942 1.427597354 M
## 109 0.417306877 0.674306556 M
## 110 0.119668249 0.770502124 M
## 111 -0.295729614 -1.403169748 R
## 112 -0.003104477 -1.303129281 R
## 113 -1.119225438 0.383400942 R
## 114 -1.205336093 -0.269591889 R
## 115 -0.031385116 -0.727154048 R
## 116 -0.262140197 -2.421432946 R
## 117 -0.004372316 -0.720426193 R
## 118 -0.652857082 -0.170061116 R
## 119 -0.243474578 0.765583966 R
## 120 -0.862558574 -2.394683708 R
## 121 -0.084987329 -0.315983611 R
## 122 0.335663272 -1.450942265 R
## 123 -0.547564677 -1.300644608 R
## 124 -0.085770445 -0.386014589 R
## 125 1.158708949 0.194640216 R
## 126 0.838758747 0.838624142 R
## 127 0.129363261 -1.283480849 R
## 128 1.185903335 0.067746005 R
## 129 -0.411610808 -1.398984345 R
## 130 -1.584645597 -3.066092331 R
## 131 -1.195552409 -0.055875719 R
## 132 -2.077490954 -1.016881169 R
## 133 1.123569804 -1.294911355 R
## 134 -1.696277599 1.577388230 R
## 135 -2.606714342 -2.430876734 R
## 136 -0.936576706 -0.967689409 R
## 137 -0.653422361 -0.737952271 R
## 138 1.399478608 -2.336802156 R
## 139 -0.439107621 -0.751356400 R
## 140 0.682169711 -0.645106739 R
## 141 -0.391226812 -1.228763737 R
## 142 -0.175404459 -1.171822064 R
## 143 0.149961923 -0.632053488 R
## 144 -1.306586950 0.989132759 R
## 145 0.192756837 -1.588485717 R
## 146 -1.637705898 -1.115797066 R
## 147 -1.035313367 -0.044642241 R
## 148 -0.454662737 0.327593121 R
## 149 -0.398721880 -2.531360086 R
## 150 -0.521649915 -1.203285944 R
## 151 -1.036925438 -1.419544783 R
## 152 -0.766892871 -1.525224785 R
## 153 -2.249201887 -1.166084343 R
## 154 -0.868992543 0.374276284 R
## 155 -3.122277505 -1.914384957 R
## 156 -1.711225949 0.095212802 R
## 157 0.846502796 -0.576583436 R
## 158 1.436542798 -0.686131512 R
## 159 -0.842933030 -0.712232667 R
## 160 -3.122732053 -1.680571529 R
## 161 0.635603354 -1.077316478 R
## 162 -0.654996459 -2.908798364 R
## 163 0.418427304 -0.144379560 R
## 164 -1.041169997 -1.922001515 R
## 165 -0.441540025 -0.259070590 R
## 166 -1.655681734 0.576523808 R
## 167 0.953177477 -1.711512314 R
## 168 -3.210760176 -1.284814307 R
## 169 -1.217750244 -1.591163354 R
## 170 0.259282153 -0.574268729 R
## 171 -0.921207702 -0.875473417 R
## 172 -0.865572661 -2.663360159 R
## 173 -0.947803456 -0.948766125 R
## 174 -0.460461844 0.286061177 R
## 175 0.419542632 -1.381617010 R
## 176 -0.621694247 -1.065107860 R
## 177 0.222233446 -0.308422959 R
## 178 -0.643581074 0.498325789 R
## 179 -0.260344829 0.290918714 R
## 180 -1.089617028 1.086009599 R
## 181 -0.474076557 -1.474554870 R
## 182 -1.113328689 -0.745280710 R
## 183 -1.367306542 -1.446652698 R
## 184 -0.227162341 -1.792986420 R
## 185 0.869250328 0.385747269 R
## 186 -0.828656357 1.245036664 R
## 187 -0.712628500 -2.041821701 R
## 188 -0.314242395 -0.861387324 R
## 189 -1.931922510 -1.283036428 R
## 190 -1.152889515 0.238829964 R
## 191 -1.757284170 -0.475428583 R
## 192 0.091169879 -0.475918031 R
## 193 0.853052045 -0.195251645 R
## 194 -1.030858895 -1.444796537 R
## 195 -0.458061630 -2.392889498 R
## 196 -0.757335077 -0.976940053 R
## 197 -1.825122741 -1.449896879 R
## 198 0.548521112 -1.625042438 R
## 199 0.815956664 0.127893714 R
## 200 -0.802403884 -1.389200876 R
## 201 -0.504679908 0.215394678 R
## 202 -0.828050004 0.276042407 R
## 203 -0.015700504 -1.362342936 R
## 204 -1.790699584 1.276762042 R
## 205 -0.319616516 -0.271949278 R
## 206 -0.497131472 -0.653931589 R
## 207 -0.434923007 -1.431687637 R
## 208 -2.126603097 -1.447703954 R
## 209 -1.508349941 -1.887116236 R
## 210 -2.644025006 -1.683878336 R
## 211 -1.292563792 0.557925461 R
## 212 0.956404116 0.448577768 R
## 213 0.851149954 -0.762481606 R
## 214 -0.031079759 -1.607449284 R
## 215 -0.131473949 -1.989738433 R
## 216 -0.987534049 -1.880077457 R
## 217 1.350301010 -0.132513674 R
## 218 -1.488226564 -1.509045712 R
## 219 1.732090761 -1.215001202 R
## 220 -1.010449867 -0.143822342 R
## 221 -0.443012681 -2.279874936 R
## 222 -0.918668643 -1.722236931 R
PMA_PreModelling_Train_LR_OS_ROSE$Label <- rep("LR_OS_ROSE",nrow(PMA_PreModelling_Train_LR_OS_ROSE))
##################################
# Verifying the class distribution
# for the oversampled data using OS_ROSE
##################################
table(PMA_PreModelling_Train_LR_OS_ROSE$Class) ##
## M R
## 110 112
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
LR_OS_ROSE_Model <- glm(Class ~ V1 + V11,
data = PMA_PreModelling_Train_LR_OS_ROSE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_ROSE_Model)##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_ROSE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9858 -0.9400 0.2881 0.9195 1.9172
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4555 0.1701 -2.678 0.007398 **
## V1 -0.5284 0.1593 -3.317 0.000908 ***
## V11 -0.9132 0.1650 -5.535 3.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.74 on 221 degrees of freedom
## Residual deviance: 249.98 on 219 degrees of freedom
## AIC: 255.98
##
## Number of Fisher Scoring iterations: 4
LR_OS_ROSE_Model_Coef <- (as.data.frame(LR_OS_ROSE_Model$coefficients))
LR_OS_ROSE_Model_Coef$Coef <- rownames(LR_OS_ROSE_Model_Coef)
LR_OS_ROSE_Model_Coef$Model <- rep("LR_OS_ROSE",nrow(LR_OS_ROSE_Model_Coef))
colnames(LR_OS_ROSE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_ROSE_Model_Coef, rownames=FALSE)## Estimates Coefficients Model
## (Intercept) -0.4555392 (Intercept) LR_OS_ROSE
## V1 -0.5283578 V1 LR_OS_ROSE
## V11 -0.9131847 V11 LR_OS_ROSE
##################################
# Computing the model predictions
##################################
(LR_OS_ROSE_Model_Probabilities <- predict(LR_OS_ROSE_Model,
type = c("response")))## 1 2 3 4 5 6 7
## 0.42272822 0.24048787 0.30816041 0.37539602 0.19002887 0.10106385 0.63913697
## 8 9 10 11 12 13 14
## 0.42176519 0.78863241 0.29839231 0.53082787 0.28422724 0.14170857 0.79571130
## 15 16 17 18 19 20 21
## 0.22330577 0.75894563 0.41991396 0.63880235 0.49785184 0.74799586 0.27924052
## 22 23 24 25 26 27 28
## 0.37505987 0.46687195 0.54125677 0.53154810 0.02608136 0.35805690 0.68056655
## 29 30 31 32 33 34 35
## 0.05131761 0.24480950 0.81543281 0.49588602 0.51951627 0.40614035 0.23464164
## 36 37 38 39 40 41 42
## 0.13070302 0.32982442 0.79130846 0.33800037 0.59903625 0.82478350 0.06863251
## 43 44 45 46 47 48 49
## 0.18028811 0.04447561 0.48934125 0.34825343 0.35375905 0.18642890 0.36923822
## 50 51 52 53 54 55 56
## 0.33217220 0.25777606 0.20450560 0.26753625 0.46612769 0.27596505 0.69824553
## 57 58 59 60 61 62 63
## 0.27972403 0.29238147 0.06060725 0.11495790 0.61760221 0.48333050 0.39953443
## 64 65 66 67 68 69 70
## 0.23594946 0.04138595 0.32870381 0.72784589 0.14861986 0.12477099 0.62351399
## 71 72 73 74 75 76 77
## 0.17900745 0.52700098 0.36494339 0.18093261 0.17230032 0.30917596 0.59405330
## 78 79 80 81 82 83 84
## 0.38163331 0.20601548 0.48623520 0.04006559 0.42269894 0.86079066 0.36046859
## 85 86 87 88 89 90 91
## 0.34420429 0.43261336 0.51611451 0.40736875 0.24799075 0.63513227 0.54858837
## 92 93 94 95 96 97 98
## 0.31319703 0.40034265 0.61582913 0.74997077 0.35432276 0.49167898 0.30177638
## 99 100 101 102 103 104 105
## 0.19216831 0.54032242 0.09540200 0.64698261 0.45722602 0.08517833 0.79664657
## 106 107 108 109 110 111 112
## 0.32316339 0.72398394 0.08460516 0.21554948 0.22751830 0.72751777 0.67614219
## 113 114 115 116 117 118 119
## 0.44662821 0.60527575 0.55603100 0.86923142 0.55098545 0.51117317 0.26385700
## 120 121 122 123 124 125 126
## 0.89907501 0.46951683 0.66643143 0.73526461 0.48557423 0.22348101 0.15915581
## 127 128 129 130 131 132 133
## 0.65659701 0.24158944 0.73874767 0.96013708 0.55654831 0.82788619 0.53327752
## 134 135 136 137 138 139 140
## 0.26899165 0.95857559 0.71565046 0.63728241 0.71889102 0.61362934 0.44352519
## 141 142 143 144 145 146 147
## 0.70542292 0.66978816 0.51060062 0.33884294 0.70954950 0.80669624 0.53301273
## 148 149 150 151 152 153 154
## 0.37414679 0.88762701 0.71481811 0.80037425 0.79289528 0.85786807 0.41625077
## 155 156 157 158 159 160 161
## 0.94989962 0.58944065 0.40702723 0.35709778 0.65480568 0.93871938 0.54795805
## 162 163 164 165 166 167 168
## 0.92735848 0.36707788 0.86409346 0.50358274 0.47322076 0.64651810 0.91790101
## 169 170 171 172 173 174 175
## 0.83765665 0.48297670 0.69649384 0.91937851 0.71333541 0.38379354 0.64209386
## 176 177 178 179 180 181 182
## 0.69963871 0.42768298 0.36110724 0.35809396 0.29493003 0.75795197 0.69280709
## 183 184 185 186 187 188 189
## 0.83032735 0.78613971 0.21975873 0.23963883 0.85638009 0.62177733 0.85028733
## 190 191 192 193 194 195 196
## 0.48388136 0.71240426 0.48272976 0.32564362 0.80352755 0.87778623 0.69777637
## 197 198 199 200 201 202 203
## 0.86209523 0.67669167 0.26826426 0.77504386 0.40478111 0.43288003 0.68929534
## 204 205 206 207 208 209 210
## 0.33730469 0.49041932 0.59971660 0.74680630 0.87975259 0.88742407 0.92266675
## 211 212 213 214 215 216 217
## 0.42994096 0.20253953 0.44794817 0.73667934 0.80704537 0.85607802 0.25961475
## 218 219 220 221 222
## 0.84668245 0.43507308 0.55222785 0.86535253 0.83237685
##################################
# Creating a classification index
# based from the model predictions
##################################
(LR_OS_ROSE_Model_Indices <- predict(LR_OS_ROSE_Model,
type = c("link")))## 1 2 3 4 5 6
## -0.311583732 -1.150006656 -0.808733670 -0.509136303 -1.449822617 -2.185459486
## 7 8 9 10 11 12
## 0.571620329 -0.315531291 1.316701585 -0.854965324 0.123468107 -0.923588677
## 13 14 15 16 17 18
## -1.801171064 1.359702258 -1.246504767 1.146907641 -0.323126605 0.570169780
## 19 20 21 22 23 24
## -0.008592705 1.087951913 -0.948232019 -0.510570169 -0.132706621 0.165403137
## 25 26 27 28 29 30
## 0.126360262 -3.620106864 -0.583807739 0.756376634 -2.917040046 -1.126489672
## 31 32 33 34 35 36
## 1.485705461 -0.016456294 0.078104777 -0.379944235 -1.182284732 -1.894757113
## 37 38 39 40 41 42
## -0.708979289 1.332830551 -0.672218011 0.401451079 1.549098576 -2.607887604
## 43 44 45 46 47 48
## -1.514396714 -3.067319346 -0.042641454 -0.626725333 -0.602556377 -1.473383381
## 49 50 51 52 53 54
## -0.535486224 -0.698376867 -1.057559759 -1.358368477 -1.007158785 -0.135697076
## 55 56 57 58 59 60
## -0.964565444 0.838957115 -0.945830902 -0.883845811 -2.740819106 -2.041069195
## 61 62 63 64 65 66
## 0.479383185 -0.066702727 -0.407405360 -1.175016290 -3.142547062 -0.714053376
## 67 68 69 70 71 72
## 0.983720838 -1.745466933 -1.948005631 0.504490295 -1.523086620 0.108109072
## 73 74 75 76 77 78
## -0.553971878 -1.510041697 -1.569411378 -0.803974603 0.380747178 -0.482621426
## 79 80 81 82 83 84
## -1.349112661 -0.055073129 -3.176347023 -0.311703691 1.821872477 -0.573330931
## 85 86 87 88 89 90
## -0.644613971 -0.271196583 0.064480356 -0.374853574 -1.109357179 0.554298385
## 91 92 93 94 95 96
## 0.194968769 -0.785214949 -0.404037608 0.471882126 1.098456379 -0.600091505
## 97 98 99 100 101 102
## -0.033287169 -0.838853164 -1.435982099 0.161640704 -2.249391106 0.605802116
## 103 104 105 106 107 108
## -0.171515133 -2.373982082 1.365465607 -0.739271854 0.964310162 -2.381360181
## 109 110 111 112 113 114
## -1.291793024 -1.222377651 0.982065023 0.736098817 -0.214303591 0.427496730
## 115 116 117 118 119 120
## 0.225069325 1.894180197 0.204653127 0.044700141 -1.026017110 2.186988947
## 121 122 123 124 125 126
## -0.122084091 0.692088786 1.021499657 -0.057719106 -1.245494630 -1.664522726
## 127 128 129 130 131 132
## 0.648165803 -1.143985141 1.039469713 3.181629377 0.227165101 1.570719734
## 133 134 135 136 137 138
## 0.133307162 -0.999744528 3.141578211 0.922987602 0.563588342 0.938967358
## 139 140 141 142 143 144
## 0.462593918 -0.226867299 0.873256809 0.707227114 0.042408819 -0.668454716
## 145 146 147 148 149 150
## 0.893197099 1.428684346 0.132243309 -0.514467642 2.066727983 0.918900944
## 151 152 153 154 155 156
## 1.388635067 1.342466612 1.797694609 -0.338183600 2.942327636 0.361653519
## 157 158 159 160 161 162
## -0.376268405 -0.587983024 0.640231037 2.729052948 0.192423733 2.546803534
## 163 164 165 166 167 168
## -0.544773350 1.849713529 0.014331213 -0.107219562 0.603768915 2.414163843
## 169 170 171 172 173 174
## 1.640894726 -0.068119539 0.830657032 2.433932696 0.911638889 -0.473477302
## 175 176 177 178 179 180
## 0.584463706 0.845578033 -0.291310863 -0.570561627 -0.583646521 -0.871558923
## 181 182 183 184 185 186
## 1.141483829 0.813275667 1.587949042 1.301811601 -1.267072947 -1.154660623
## 187 188 189 190 191 192
## 1.785544023 0.497098959 1.736856426 -0.064496902 0.907089741 -0.069108460
## 193 194 195 196 197 198
## -0.727955121 1.408489278 1.971631378 0.836731432 1.832802349 0.738609301
## 199 200 201 202 203 204
## -1.003446893 1.237014177 -0.385582774 -0.270110234 0.796827032 -0.675328688
## 205 206 207 208 209 210
## -0.038327405 0.404284424 1.081651042 1.990089307 2.064695003 2.479144060
## 211 212 213 214 215 216
## -0.282092041 -1.370497185 -0.208964397 1.028780156 1.430924825 1.783090147
## 217 218 219 220 221 222
## -1.047971859 1.708814429 -0.261182377 0.209676238 1.860476975 1.602567012
max(LR_OS_ROSE_Model_Indices)## [1] 3.181629
min(LR_OS_ROSE_Model_Indices)## [1] -3.620107
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
LR_OS_ROSE_Model_Predictions <- as.data.frame(PMA_PreModelling_Train_LR_OS_ROSE)
LR_OS_ROSE_Model_Predictions$LR_OS_ROSE_Prob <- LR_OS_ROSE_Model_Probabilities
LR_OS_ROSE_Model_Predictions$LR_OS_ROSE_LP <- LR_OS_ROSE_Model_Indices
LR_OS_ROSE_Model_Predictions$Class <- as.factor(LR_OS_ROSE_Model_Predictions$Class)
LR_OS_ROSE_Model_Predictions$Label <- rep("LR_OS_ROSE",nrow(LR_OS_ROSE_Model_Predictions))
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
LR_OS_ROSE_Model_Predictions %>%
ggplot(aes(x = LR_OS_ROSE_LP ,
y = LR_OS_ROSE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_ROSE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")##################################
# Visualizing the imbalanced data set
# Visualizing the undersampled and oversampled data
##################################
LR_ClassDistribution <- PMA_PreModelling_Train_LR %>%
ggplot(aes(x = V1 ,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_US_DOWNSAMPLE_ClassDistribution <- PMA_PreModelling_Train_LR_US_DOWNSAMPLE %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_OS_UPSAMPLE_ClassDistribution <- PMA_PreModelling_Train_LR_OS_UPSAMPLE %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_US_NEARMISS_ClassDistribution <- PMA_PreModelling_Train_LR_US_NEARMISS %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_US_TOMEK_ClassDistribution <- PMA_PreModelling_Train_LR_US_TOMEK %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_OS_ADASYN_ClassDistribution <- PMA_PreModelling_Train_LR_OS_ADASYN %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_OS_BSMOTE_ClassDistribution <- PMA_PreModelling_Train_LR_OS_BSMOTE %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_OS_SMOTE_ClassDistribution <- PMA_PreModelling_Train_LR_OS_SMOTE %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
LR_OS_ROSE_ClassDistribution <- PMA_PreModelling_Train_LR_OS_ROSE %>%
ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
RDD_ClassDistribution <- ggarrange(LR_ClassDistribution,
LR_US_DOWNSAMPLE_ClassDistribution,
LR_OS_UPSAMPLE_ClassDistribution,
LR_US_NEARMISS_ClassDistribution,
LR_US_TOMEK_ClassDistribution,
LR_OS_ADASYN_ClassDistribution,
LR_OS_BSMOTE_ClassDistribution,
LR_OS_SMOTE_ClassDistribution,
LR_OS_ROSE_ClassDistribution,
ncol=3, nrow=3)
annotate_figure(RDD_ClassDistribution,
top = text_grob("Class Distribution",
color = "black",
face = "bold",
size = 14))##################################
# Replotting the logistic curves
##################################
LR_LogisticCurvePlot <- LR_Model_Predictions %>%
ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_US_DOWNSAMPLE_LogisticCurvePlot <- LR_US_DOWNSAMPLE_Model_Predictions %>%
ggplot(aes(x = LR_US_DOWNSAMPLE_LP ,
y = LR_US_DOWNSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_OS_UPSAMPLE_LogisticCurvePlot <- LR_OS_UPSAMPLE_Model_Predictions %>%
ggplot(aes(x = LR_OS_UPSAMPLE_LP ,
y = LR_OS_UPSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_US_NEARMISS_LogisticCurvePlot <- LR_US_NEARMISS_Model_Predictions %>%
ggplot(aes(x = LR_US_NEARMISS_LP ,
y = LR_US_NEARMISS_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_US_TOMEK_LogisticCurvePlot <- LR_US_TOMEK_Model_Predictions %>%
ggplot(aes(x = LR_US_TOMEK_LP ,
y = LR_US_TOMEK_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_OS_ADASYN_LogisticCurvePlot <- LR_OS_ADASYN_Model_Predictions %>%
ggplot(aes(x = LR_OS_ADASYN_LP ,
y = LR_OS_ADASYN_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_OS_BSMOTE_LogisticCurvePlot <- LR_OS_BSMOTE_Model_Predictions %>%
ggplot(aes(x = LR_OS_BSMOTE_LP ,
y = LR_OS_BSMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_OS_SMOTE_LogisticCurvePlot <- LR_OS_SMOTE_Model_Predictions %>%
ggplot(aes(x = LR_OS_SMOTE_LP ,
y = LR_OS_SMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
LR_OS_ROSE_LogisticCurvePlot <- LR_OS_ROSE_Model_Predictions %>%
ggplot(aes(x = LR_OS_ROSE_LP ,
y = LR_OS_ROSE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
RLR_LogisticCurvePlot <- ggarrange(LR_LogisticCurvePlot,
LR_US_DOWNSAMPLE_LogisticCurvePlot,
LR_OS_UPSAMPLE_LogisticCurvePlot,
LR_US_NEARMISS_LogisticCurvePlot,
LR_US_TOMEK_LogisticCurvePlot,
LR_OS_ADASYN_LogisticCurvePlot,
LR_OS_BSMOTE_LogisticCurvePlot,
LR_OS_SMOTE_LogisticCurvePlot,
LR_OS_ROSE_LogisticCurvePlot,
ncol=3, nrow=3)
annotate_figure(RLR_LogisticCurvePlot,
top = text_grob("Estimated Rock Detection Probabilities Based on Classification Index",
color = "black",
face = "bold",
size = 14))